Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: scheduler debugging, core

scheduler debugging core: implement /proc/sched_debug and
/proc/<PID>/sched files for scheduler debugging.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

+364
+69
fs/proc/base.c
··· 929 929 }; 930 930 #endif 931 931 932 + #ifdef CONFIG_SCHED_DEBUG 933 + /* 934 + * Print out various scheduling related per-task fields: 935 + */ 936 + static int sched_show(struct seq_file *m, void *v) 937 + { 938 + struct inode *inode = m->private; 939 + struct task_struct *p; 940 + 941 + WARN_ON(!inode); 942 + 943 + p = get_proc_task(inode); 944 + if (!p) 945 + return -ESRCH; 946 + proc_sched_show_task(p, m); 947 + 948 + put_task_struct(p); 949 + 950 + return 0; 951 + } 952 + 953 + static ssize_t 954 + sched_write(struct file *file, const char __user *buf, 955 + size_t count, loff_t *offset) 956 + { 957 + struct inode *inode = file->f_path.dentry->d_inode; 958 + struct task_struct *p; 959 + 960 + WARN_ON(!inode); 961 + 962 + p = get_proc_task(inode); 963 + if (!p) 964 + return -ESRCH; 965 + proc_sched_set_task(p); 966 + 967 + put_task_struct(p); 968 + 969 + return count; 970 + } 971 + 972 + static int sched_open(struct inode *inode, struct file *filp) 973 + { 974 + int ret; 975 + 976 + ret = single_open(filp, sched_show, NULL); 977 + if (!ret) { 978 + struct seq_file *m = filp->private_data; 979 + 980 + m->private = inode; 981 + } 982 + return ret; 983 + } 984 + 985 + static const struct file_operations proc_pid_sched_operations = { 986 + .open = sched_open, 987 + .read = seq_read, 988 + .write = sched_write, 989 + .llseek = seq_lseek, 990 + .release = seq_release, 991 + }; 992 + 993 + #endif 994 + 932 995 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 933 996 { 934 997 struct inode *inode = dentry->d_inode; ··· 2026 1963 INF("environ", S_IRUSR, pid_environ), 2027 1964 INF("auxv", S_IRUSR, pid_auxv), 2028 1965 INF("status", S_IRUGO, pid_status), 1966 + #ifdef CONFIG_SCHED_DEBUG 1967 + REG("sched", S_IRUGO|S_IWUSR, pid_sched), 1968 + #endif 2029 1969 INF("cmdline", S_IRUGO, pid_cmdline), 2030 1970 INF("stat", S_IRUGO, tgid_stat), 2031 1971 INF("statm", S_IRUGO, pid_statm), ··· 2313 2247 INF("environ", S_IRUSR, pid_environ), 2314 2248 INF("auxv", S_IRUSR, pid_auxv), 2315 2249 INF("status", S_IRUGO, pid_status), 2250 + #ifdef CONFIG_SCHED_DEBUG 2251 + REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2252 + #endif 2316 2253 INF("cmdline", S_IRUGO, pid_cmdline), 2317 2254 INF("stat", S_IRUGO, tid_stat), 2318 2255 INF("statm", S_IRUGO, pid_statm),
+20
include/linux/sched.h
··· 132 132 extern unsigned long nr_iowait(void); 133 133 extern unsigned long weighted_cpuload(const int cpu); 134 134 135 + struct seq_file; 136 + struct cfs_rq; 137 + #ifdef CONFIG_SCHED_DEBUG 138 + extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); 139 + extern void proc_sched_set_task(struct task_struct *p); 140 + extern void 141 + print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); 142 + #else 143 + static inline void 144 + proc_sched_show_task(struct task_struct *p, struct seq_file *m) 145 + { 146 + } 147 + static inline void proc_sched_set_task(struct task_struct *p) 148 + { 149 + } 150 + static inline void 151 + print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) 152 + { 153 + } 154 + #endif 135 155 136 156 /* 137 157 * Task state bitmask. NOTE! These bits are also
+275
kernel/sched_debug.c
··· 1 + /* 2 + * kernel/time/sched_debug.c 3 + * 4 + * Print the CFS rbtree 5 + * 6 + * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + */ 12 + 13 + #include <linux/proc_fs.h> 14 + #include <linux/sched.h> 15 + #include <linux/seq_file.h> 16 + #include <linux/kallsyms.h> 17 + #include <linux/utsname.h> 18 + 19 + /* 20 + * This allows printing both to /proc/sched_debug and 21 + * to the console 22 + */ 23 + #define SEQ_printf(m, x...) \ 24 + do { \ 25 + if (m) \ 26 + seq_printf(m, x); \ 27 + else \ 28 + printk(x); \ 29 + } while (0) 30 + 31 + static void 32 + print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) 33 + { 34 + if (rq->curr == p) 35 + SEQ_printf(m, "R"); 36 + else 37 + SEQ_printf(m, " "); 38 + 39 + SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d " 40 + "%15Ld %15Ld %15Ld %15Ld %15Ld\n", 41 + p->comm, p->pid, 42 + (long long)p->se.fair_key, 43 + (long long)(p->se.fair_key - rq->cfs.fair_clock), 44 + (long long)p->se.wait_runtime, 45 + (long long)(p->nvcsw + p->nivcsw), 46 + p->prio, 47 + (long long)p->se.sum_exec_runtime, 48 + (long long)p->se.sum_wait_runtime, 49 + (long long)p->se.sum_sleep_runtime, 50 + (long long)p->se.wait_runtime_overruns, 51 + (long long)p->se.wait_runtime_underruns); 52 + } 53 + 54 + static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) 55 + { 56 + struct task_struct *g, *p; 57 + 58 + SEQ_printf(m, 59 + "\nrunnable tasks:\n" 60 + " task PID tree-key delta waiting" 61 + " switches prio" 62 + " sum-exec sum-wait sum-sleep" 63 + " wait-overrun wait-underrun\n" 64 + "------------------------------------------------------------------" 65 + "----------------" 66 + "------------------------------------------------" 67 + "--------------------------------\n"); 68 + 69 + read_lock_irq(&tasklist_lock); 70 + 71 + do_each_thread(g, p) { 72 + if (!p->se.on_rq || task_cpu(p) != rq_cpu) 73 + continue; 74 + 75 + print_task(m, rq, p, now); 76 + } while_each_thread(g, p); 77 + 78 + read_unlock_irq(&tasklist_lock); 79 + } 80 + 81 + static void 82 + print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 83 + { 84 + s64 wait_runtime_rq_sum = 0; 85 + struct task_struct *p; 86 + struct rb_node *curr; 87 + unsigned long flags; 88 + struct rq *rq = &per_cpu(runqueues, cpu); 89 + 90 + spin_lock_irqsave(&rq->lock, flags); 91 + curr = first_fair(cfs_rq); 92 + while (curr) { 93 + p = rb_entry(curr, struct task_struct, se.run_node); 94 + wait_runtime_rq_sum += p->se.wait_runtime; 95 + 96 + curr = rb_next(curr); 97 + } 98 + spin_unlock_irqrestore(&rq->lock, flags); 99 + 100 + SEQ_printf(m, " .%-30s: %Ld\n", "wait_runtime_rq_sum", 101 + (long long)wait_runtime_rq_sum); 102 + } 103 + 104 + void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) 105 + { 106 + SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); 107 + 108 + #define P(x) \ 109 + SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) 110 + 111 + P(fair_clock); 112 + P(exec_clock); 113 + P(wait_runtime); 114 + P(wait_runtime_overruns); 115 + P(wait_runtime_underruns); 116 + P(sleeper_bonus); 117 + #undef P 118 + 119 + print_cfs_rq_runtime_sum(m, cpu, cfs_rq); 120 + } 121 + 122 + static void print_cpu(struct seq_file *m, int cpu, u64 now) 123 + { 124 + struct rq *rq = &per_cpu(runqueues, cpu); 125 + 126 + #ifdef CONFIG_X86 127 + { 128 + unsigned int freq = cpu_khz ? : 1; 129 + 130 + SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", 131 + cpu, freq / 1000, (freq % 1000)); 132 + } 133 + #else 134 + SEQ_printf(m, "\ncpu#%d\n", cpu); 135 + #endif 136 + 137 + #define P(x) \ 138 + SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x)) 139 + 140 + P(nr_running); 141 + SEQ_printf(m, " .%-30s: %lu\n", "load", 142 + rq->ls.load.weight); 143 + P(ls.delta_fair); 144 + P(ls.delta_exec); 145 + P(nr_switches); 146 + P(nr_load_updates); 147 + P(nr_uninterruptible); 148 + SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies); 149 + P(next_balance); 150 + P(curr->pid); 151 + P(clock); 152 + P(prev_clock_raw); 153 + P(clock_warps); 154 + P(clock_overflows); 155 + P(clock_unstable_events); 156 + P(clock_max_delta); 157 + P(cpu_load[0]); 158 + P(cpu_load[1]); 159 + P(cpu_load[2]); 160 + P(cpu_load[3]); 161 + P(cpu_load[4]); 162 + #undef P 163 + 164 + print_cfs_stats(m, cpu, now); 165 + 166 + print_rq(m, rq, cpu, now); 167 + } 168 + 169 + static int sched_debug_show(struct seq_file *m, void *v) 170 + { 171 + u64 now = ktime_to_ns(ktime_get()); 172 + int cpu; 173 + 174 + SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v20, %s %.*s\n", 175 + init_utsname()->release, 176 + (int)strcspn(init_utsname()->version, " "), 177 + init_utsname()->version); 178 + 179 + SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); 180 + 181 + for_each_online_cpu(cpu) 182 + print_cpu(m, cpu, now); 183 + 184 + SEQ_printf(m, "\n"); 185 + 186 + return 0; 187 + } 188 + 189 + void sysrq_sched_debug_show(void) 190 + { 191 + sched_debug_show(NULL, NULL); 192 + } 193 + 194 + static int sched_debug_open(struct inode *inode, struct file *filp) 195 + { 196 + return single_open(filp, sched_debug_show, NULL); 197 + } 198 + 199 + static struct file_operations sched_debug_fops = { 200 + .open = sched_debug_open, 201 + .read = seq_read, 202 + .llseek = seq_lseek, 203 + .release = seq_release, 204 + }; 205 + 206 + static int __init init_sched_debug_procfs(void) 207 + { 208 + struct proc_dir_entry *pe; 209 + 210 + pe = create_proc_entry("sched_debug", 0644, NULL); 211 + if (!pe) 212 + return -ENOMEM; 213 + 214 + pe->proc_fops = &sched_debug_fops; 215 + 216 + return 0; 217 + } 218 + 219 + __initcall(init_sched_debug_procfs); 220 + 221 + void proc_sched_show_task(struct task_struct *p, struct seq_file *m) 222 + { 223 + unsigned long flags; 224 + int num_threads = 1; 225 + 226 + rcu_read_lock(); 227 + if (lock_task_sighand(p, &flags)) { 228 + num_threads = atomic_read(&p->signal->count); 229 + unlock_task_sighand(p, &flags); 230 + } 231 + rcu_read_unlock(); 232 + 233 + SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); 234 + SEQ_printf(m, "----------------------------------------------\n"); 235 + #define P(F) \ 236 + SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) 237 + 238 + P(se.wait_start); 239 + P(se.wait_start_fair); 240 + P(se.exec_start); 241 + P(se.sleep_start); 242 + P(se.sleep_start_fair); 243 + P(se.block_start); 244 + P(se.sleep_max); 245 + P(se.block_max); 246 + P(se.exec_max); 247 + P(se.wait_max); 248 + P(se.wait_runtime); 249 + P(se.wait_runtime_overruns); 250 + P(se.wait_runtime_underruns); 251 + P(se.sum_wait_runtime); 252 + P(se.sum_exec_runtime); 253 + SEQ_printf(m, "%-25s:%20Ld\n", 254 + "nr_switches", (long long)(p->nvcsw + p->nivcsw)); 255 + P(se.load.weight); 256 + P(policy); 257 + P(prio); 258 + #undef P 259 + 260 + { 261 + u64 t0, t1; 262 + 263 + t0 = sched_clock(); 264 + t1 = sched_clock(); 265 + SEQ_printf(m, "%-25s:%20Ld\n", 266 + "clock-delta", (long long)(t1-t0)); 267 + } 268 + } 269 + 270 + void proc_sched_set_task(struct task_struct *p) 271 + { 272 + p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0; 273 + p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; 274 + p->se.sum_exec_runtime = 0; 275 + }