Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

delayacct: track delays from thrashing cache pages

Delay accounting already measures the time a task spends in direct reclaim
and waiting for swapin, but in low memory situations tasks spend can spend
a significant amount of their time waiting on thrashing page cache. This
isn't tracked right now.

To know the full impact of memory contention on an individual task,
measure the delay when waiting for a recently evicted active cache page to
read back into memory.

Also update tools/accounting/getdelays.c:

[hannes@computer accounting]$ sudo ./getdelays -d -p 1
print delayacct stats ON
PID 1

CPU count real total virtual total delay total delay average
50318 745000000 847346785 400533713 0.008ms
IO count delay total delay average
435 122601218 0ms
SWAP count delay total delay average
0 0 0ms
RECLAIM count delay total delay average
0 0 0ms
THRASHING count delay total delay average
19 12621439 0ms

Link: http://lkml.kernel.org/r/20180828172258.3185-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Drake <drake@endlessm.com>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
b1d29ba8 1899ad18

+61 -2
+23
include/linux/delayacct.h
··· 57 57 58 58 u64 freepages_start; 59 59 u64 freepages_delay; /* wait for memory reclaim */ 60 + 61 + u64 thrashing_start; 62 + u64 thrashing_delay; /* wait for thrashing page */ 63 + 60 64 u32 freepages_count; /* total count of memory reclaim */ 65 + u32 thrashing_count; /* total count of thrash waits */ 61 66 }; 62 67 #endif 63 68 ··· 81 76 extern __u64 __delayacct_blkio_ticks(struct task_struct *); 82 77 extern void __delayacct_freepages_start(void); 83 78 extern void __delayacct_freepages_end(void); 79 + extern void __delayacct_thrashing_start(void); 80 + extern void __delayacct_thrashing_end(void); 84 81 85 82 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) 86 83 { ··· 163 156 __delayacct_freepages_end(); 164 157 } 165 158 159 + static inline void delayacct_thrashing_start(void) 160 + { 161 + if (current->delays) 162 + __delayacct_thrashing_start(); 163 + } 164 + 165 + static inline void delayacct_thrashing_end(void) 166 + { 167 + if (current->delays) 168 + __delayacct_thrashing_end(); 169 + } 170 + 166 171 #else 167 172 static inline void delayacct_set_flag(int flag) 168 173 {} ··· 200 181 static inline void delayacct_freepages_start(void) 201 182 {} 202 183 static inline void delayacct_freepages_end(void) 184 + {} 185 + static inline void delayacct_thrashing_start(void) 186 + {} 187 + static inline void delayacct_thrashing_end(void) 203 188 {} 204 189 205 190 #endif /* CONFIG_TASK_DELAY_ACCT */
+5 -1
include/uapi/linux/taskstats.h
··· 34 34 */ 35 35 36 36 37 - #define TASKSTATS_VERSION 8 37 + #define TASKSTATS_VERSION 9 38 38 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 39 39 * in linux/sched.h */ 40 40 ··· 164 164 /* Delay waiting for memory reclaim */ 165 165 __u64 freepages_count; 166 166 __u64 freepages_delay_total; 167 + 168 + /* Delay waiting for thrashing page */ 169 + __u64 thrashing_count; 170 + __u64 thrashing_delay_total; 167 171 }; 168 172 169 173
+15
kernel/delayacct.c
··· 135 135 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; 136 136 tmp = d->freepages_delay_total + tsk->delays->freepages_delay; 137 137 d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; 138 + tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; 139 + d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; 138 140 d->blkio_count += tsk->delays->blkio_count; 139 141 d->swapin_count += tsk->delays->swapin_count; 140 142 d->freepages_count += tsk->delays->freepages_count; 143 + d->thrashing_count += tsk->delays->thrashing_count; 141 144 raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); 142 145 143 146 return 0; ··· 172 169 &current->delays->freepages_count); 173 170 } 174 171 172 + void __delayacct_thrashing_start(void) 173 + { 174 + current->delays->thrashing_start = ktime_get_ns(); 175 + } 176 + 177 + void __delayacct_thrashing_end(void) 178 + { 179 + delayacct_end(&current->delays->lock, 180 + &current->delays->thrashing_start, 181 + &current->delays->thrashing_delay, 182 + &current->delays->thrashing_count); 183 + }
+11
mm/filemap.c
··· 36 36 #include <linux/cleancache.h> 37 37 #include <linux/shmem_fs.h> 38 38 #include <linux/rmap.h> 39 + #include <linux/delayacct.h> 39 40 #include "internal.h" 40 41 41 42 #define CREATE_TRACE_POINTS ··· 1074 1073 { 1075 1074 struct wait_page_queue wait_page; 1076 1075 wait_queue_entry_t *wait = &wait_page.wait; 1076 + bool thrashing = false; 1077 1077 int ret = 0; 1078 + 1079 + if (bit_nr == PG_locked && !PageSwapBacked(page) && 1080 + !PageUptodate(page) && PageWorkingset(page)) { 1081 + delayacct_thrashing_start(); 1082 + thrashing = true; 1083 + } 1078 1084 1079 1085 init_wait(wait); 1080 1086 wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0; ··· 1120 1112 } 1121 1113 1122 1114 finish_wait(q, wait); 1115 + 1116 + if (thrashing) 1117 + delayacct_thrashing_end(); 1123 1118 1124 1119 /* 1125 1120 * A signal could leave PageWaiters set. Clearing it here if
+7 -1
tools/accounting/getdelays.c
··· 203 203 "SWAP %15s%15s%15s\n" 204 204 " %15llu%15llu%15llums\n" 205 205 "RECLAIM %12s%15s%15s\n" 206 + " %15llu%15llu%15llums\n" 207 + "THRASHING%12s%15s%15s\n" 206 208 " %15llu%15llu%15llums\n", 207 209 "count", "real total", "virtual total", 208 210 "delay total", "delay average", ··· 224 222 "count", "delay total", "delay average", 225 223 (unsigned long long)t->freepages_count, 226 224 (unsigned long long)t->freepages_delay_total, 227 - average_ms(t->freepages_delay_total, t->freepages_count)); 225 + average_ms(t->freepages_delay_total, t->freepages_count), 226 + "count", "delay total", "delay average", 227 + (unsigned long long)t->thrashing_count, 228 + (unsigned long long)t->thrashing_delay_total, 229 + average_ms(t->thrashing_delay_total, t->thrashing_count)); 228 230 } 229 231 230 232 static void task_context_switch_counts(struct taskstats *t)