Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf report: Sort child tasks by tid

Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf report
--tasks output now shows child threads in an order determined by the
hashing. For example, in this snippet tid 3 appears after tid 256 even
though they have the same ppid 2:

```
$ perf report --tasks
% pid tid ppid comm
0 0 -1 |swapper
2 2 0 | kthreadd
256 256 2 | kworker/12:1H-k
693761 693761 2 | kworker/10:1-mm
1301762 1301762 2 | kworker/1:1-mm_
1302530 1302530 2 | kworker/u32:0-k
3 3 2 | rcu_gp
...
```

The output is easier to read if threads appear numerically
increasing. To allow for this, read all threads into a list then sort
with a comparator that orders by the child task's of the first common
parent. The list creation and deletion are created as utilities on
machine. The indentation is possible by counting the number of
parents a child has.

With this change the output for the same data file is now like:
```
$ perf report --tasks
% pid tid ppid comm
0 0 -1 |swapper
1 1 0 | systemd
823 823 1 | systemd-journal
853 853 1 | systemd-udevd
3230 3230 1 | systemd-timesyn
3236 3236 1 | auditd
3239 3239 3236 | audisp-syslog
3321 3321 1 | accounts-daemon
...
```

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-2-irogers@google.com

authored by

Ian Rogers and committed by
Namhyung Kim
2f1e20fe 498d3486

+172 -93
+132 -93
tools/perf/builtin-report.c
··· 59 59 #include <linux/ctype.h> 60 60 #include <signal.h> 61 61 #include <linux/bitmap.h> 62 + #include <linux/list_sort.h> 62 63 #include <linux/string.h> 63 64 #include <linux/stringify.h> 64 65 #include <linux/time64.h> ··· 829 828 rep->tool.no_warn = true; 830 829 } 831 830 832 - struct task { 833 - struct thread *thread; 834 - struct list_head list; 835 - struct list_head children; 836 - }; 837 - 838 - static struct task *tasks_list(struct task *task, struct machine *machine) 839 - { 840 - struct thread *parent_thread, *thread = task->thread; 841 - struct task *parent_task; 842 - 843 - /* Already listed. */ 844 - if (!list_empty(&task->list)) 845 - return NULL; 846 - 847 - /* Last one in the chain. */ 848 - if (thread__ppid(thread) == -1) 849 - return task; 850 - 851 - parent_thread = machine__find_thread(machine, -1, thread__ppid(thread)); 852 - if (!parent_thread) 853 - return ERR_PTR(-ENOENT); 854 - 855 - parent_task = thread__priv(parent_thread); 856 - thread__put(parent_thread); 857 - list_add_tail(&task->list, &parent_task->children); 858 - return tasks_list(parent_task, machine); 859 - } 860 - 861 831 struct maps__fprintf_task_args { 862 832 int indent; 863 833 FILE *fp; ··· 872 900 return args.printed; 873 901 } 874 902 875 - static void task__print_level(struct task *task, FILE *fp, int level) 903 + static int thread_level(struct machine *machine, const struct thread *thread) 876 904 { 877 - struct thread *thread = task->thread; 878 - struct task *child; 905 + struct thread *parent_thread; 906 + int res; 907 + 908 + if (thread__tid(thread) <= 0) 909 + return 0; 910 + 911 + if (thread__ppid(thread) <= 0) 912 + return 1; 913 + 914 + parent_thread = machine__find_thread(machine, -1, thread__ppid(thread)); 915 + if (!parent_thread) { 916 + pr_err("Missing parent thread of %d\n", thread__tid(thread)); 917 + return 0; 918 + } 919 + res = 1 + thread_level(machine, parent_thread); 920 + thread__put(parent_thread); 921 + return res; 922 + } 923 + 924 + static void task__print_level(struct machine *machine, struct thread *thread, FILE *fp) 925 + { 926 + int level = thread_level(machine, thread); 879 927 int comm_indent = fprintf(fp, " %8d %8d %8d |%*s", 880 928 thread__pid(thread), thread__tid(thread), 881 929 thread__ppid(thread), level, ""); ··· 903 911 fprintf(fp, "%s\n", thread__comm_str(thread)); 904 912 905 913 maps__fprintf_task(thread__maps(thread), comm_indent, fp); 914 + } 906 915 907 - if (!list_empty(&task->children)) { 908 - list_for_each_entry(child, &task->children, list) 909 - task__print_level(child, fp, level + 1); 916 + /* 917 + * Sort two thread list nodes such that they form a tree. The first node is the 918 + * root of the tree, its children are ordered numerically after it. If a child 919 + * has children itself then they appear immediately after their parent. For 920 + * example, the 4 threads in the order they'd appear in the list: 921 + * - init with a TID 1 and a parent of 0 922 + * - systemd with a TID 3000 and a parent of init/1 923 + * - systemd child thread with TID 4000, the parent is 3000 924 + * - NetworkManager is a child of init with a TID of 3500. 925 + */ 926 + static int task_list_cmp(void *priv, const struct list_head *la, const struct list_head *lb) 927 + { 928 + struct machine *machine = priv; 929 + struct thread_list *task_a = list_entry(la, struct thread_list, list); 930 + struct thread_list *task_b = list_entry(lb, struct thread_list, list); 931 + struct thread *a = task_a->thread; 932 + struct thread *b = task_b->thread; 933 + int level_a, level_b, res; 934 + 935 + /* Same thread? */ 936 + if (thread__tid(a) == thread__tid(b)) 937 + return 0; 938 + 939 + /* Compare a and b to root. */ 940 + if (thread__tid(a) == 0) 941 + return -1; 942 + 943 + if (thread__tid(b) == 0) 944 + return 1; 945 + 946 + /* If parents match sort by tid. */ 947 + if (thread__ppid(a) == thread__ppid(b)) 948 + return thread__tid(a) < thread__tid(b) ? -1 : 1; 949 + 950 + /* 951 + * Find a and b such that if they are a child of each other a and b's 952 + * tid's match, otherwise a and b have a common parent and distinct 953 + * tid's to sort by. First make the depths of the threads match. 954 + */ 955 + level_a = thread_level(machine, a); 956 + level_b = thread_level(machine, b); 957 + a = thread__get(a); 958 + b = thread__get(b); 959 + for (int i = level_a; i > level_b; i--) { 960 + struct thread *parent = machine__find_thread(machine, -1, thread__ppid(a)); 961 + 962 + thread__put(a); 963 + if (!parent) { 964 + pr_err("Missing parent thread of %d\n", thread__tid(a)); 965 + thread__put(b); 966 + return -1; 967 + } 968 + a = parent; 910 969 } 970 + for (int i = level_b; i > level_a; i--) { 971 + struct thread *parent = machine__find_thread(machine, -1, thread__ppid(b)); 972 + 973 + thread__put(b); 974 + if (!parent) { 975 + pr_err("Missing parent thread of %d\n", thread__tid(b)); 976 + thread__put(a); 977 + return 1; 978 + } 979 + b = parent; 980 + } 981 + /* Search up to a common parent. */ 982 + while (thread__ppid(a) != thread__ppid(b)) { 983 + struct thread *parent; 984 + 985 + parent = machine__find_thread(machine, -1, thread__ppid(a)); 986 + thread__put(a); 987 + if (!parent) 988 + pr_err("Missing parent thread of %d\n", thread__tid(a)); 989 + a = parent; 990 + parent = machine__find_thread(machine, -1, thread__ppid(b)); 991 + thread__put(b); 992 + if (!parent) 993 + pr_err("Missing parent thread of %d\n", thread__tid(b)); 994 + b = parent; 995 + if (!a || !b) { 996 + /* Handle missing parent (unexpected) with some sanity. */ 997 + thread__put(a); 998 + thread__put(b); 999 + return !a && !b ? 0 : (!a ? -1 : 1); 1000 + } 1001 + } 1002 + if (thread__tid(a) == thread__tid(b)) { 1003 + /* a is a child of b or vice-versa, deeper levels appear later. */ 1004 + res = level_a < level_b ? -1 : (level_a > level_b ? 1 : 0); 1005 + } else { 1006 + /* Sort by tid now the parent is the same. */ 1007 + res = thread__tid(a) < thread__tid(b) ? -1 : 1; 1008 + } 1009 + thread__put(a); 1010 + thread__put(b); 1011 + return res; 911 1012 } 912 1013 913 1014 static int tasks_print(struct report *rep, FILE *fp) 914 1015 { 915 - struct perf_session *session = rep->session; 916 - struct machine *machine = &session->machines.host; 917 - struct task *tasks, *task; 918 - unsigned int nr = 0, itask = 0, i; 919 - struct rb_node *nd; 920 - LIST_HEAD(list); 1016 + struct machine *machine = &rep->session->machines.host; 1017 + LIST_HEAD(tasks); 1018 + int ret; 921 1019 922 - /* 923 - * No locking needed while accessing machine->threads, 924 - * because --tasks is single threaded command. 925 - */ 1020 + ret = machine__thread_list(machine, &tasks); 1021 + if (!ret) { 1022 + struct thread_list *task; 926 1023 927 - /* Count all the threads. */ 928 - for (i = 0; i < THREADS__TABLE_SIZE; i++) 929 - nr += machine->threads[i].nr; 1024 + list_sort(machine, &tasks, task_list_cmp); 930 1025 931 - tasks = malloc(sizeof(*tasks) * nr); 932 - if (!tasks) 933 - return -ENOMEM; 1026 + fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); 934 1027 935 - for (i = 0; i < THREADS__TABLE_SIZE; i++) { 936 - struct threads *threads = &machine->threads[i]; 937 - 938 - for (nd = rb_first_cached(&threads->entries); nd; 939 - nd = rb_next(nd)) { 940 - task = tasks + itask++; 941 - 942 - task->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread; 943 - INIT_LIST_HEAD(&task->children); 944 - INIT_LIST_HEAD(&task->list); 945 - thread__set_priv(task->thread, task); 946 - } 1028 + list_for_each_entry(task, &tasks, list) 1029 + task__print_level(machine, task->thread, fp); 947 1030 } 948 - 949 - /* 950 - * Iterate every task down to the unprocessed parent 951 - * and link all in task children list. Task with no 952 - * parent is added into 'list'. 953 - */ 954 - for (itask = 0; itask < nr; itask++) { 955 - task = tasks + itask; 956 - 957 - if (!list_empty(&task->list)) 958 - continue; 959 - 960 - task = tasks_list(task, machine); 961 - if (IS_ERR(task)) { 962 - pr_err("Error: failed to process tasks\n"); 963 - free(tasks); 964 - return PTR_ERR(task); 965 - } 966 - 967 - if (task) 968 - list_add_tail(&task->list, &list); 969 - } 970 - 971 - fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); 972 - 973 - list_for_each_entry(task, &list, list) 974 - task__print_level(task, fp, 0); 975 - 976 - free(tasks); 977 - return 0; 1031 + thread_list__delete(&tasks); 1032 + return ret; 978 1033 } 979 1034 980 1035 static int __cmd_report(struct report *rep)
+30
tools/perf/util/machine.c
··· 3261 3261 return rc; 3262 3262 } 3263 3263 3264 + 3265 + static int thread_list_cb(struct thread *thread, void *data) 3266 + { 3267 + struct list_head *list = data; 3268 + struct thread_list *entry = malloc(sizeof(*entry)); 3269 + 3270 + if (!entry) 3271 + return -ENOMEM; 3272 + 3273 + entry->thread = thread__get(thread); 3274 + list_add_tail(&entry->list, list); 3275 + return 0; 3276 + } 3277 + 3278 + int machine__thread_list(struct machine *machine, struct list_head *list) 3279 + { 3280 + return machine__for_each_thread(machine, thread_list_cb, list); 3281 + } 3282 + 3283 + void thread_list__delete(struct list_head *list) 3284 + { 3285 + struct thread_list *pos, *next; 3286 + 3287 + list_for_each_entry_safe(pos, next, list, list) { 3288 + thread__zput(pos->thread); 3289 + list_del(&pos->list); 3290 + free(pos); 3291 + } 3292 + } 3293 + 3264 3294 pid_t machine__get_current_tid(struct machine *machine, int cpu) 3265 3295 { 3266 3296 if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)
+10
tools/perf/util/machine.h
··· 280 280 int (*fn)(struct thread *thread, void *p), 281 281 void *priv); 282 282 283 + struct thread_list { 284 + struct list_head list; 285 + struct thread *thread; 286 + }; 287 + 288 + /* Make a list of struct thread_list based on threads in the machine. */ 289 + int machine__thread_list(struct machine *machine, struct list_head *list); 290 + /* Free up the nodes within the thread_list list. */ 291 + void thread_list__delete(struct list_head *list); 292 + 283 293 pid_t machine__get_current_tid(struct machine *machine, int cpu); 284 294 int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, 285 295 pid_t tid);