Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

per-task-delay-accounting: add memory reclaim delay

Sometimes, application responses become bad under heavy memory load.
Applications take a bit time to reclaim memory. The statistics, how long
memory reclaim takes, will be useful to measure memory usage.

This patch adds accounting memory reclaim to per-task-delay-accounting for
accounting the time of do_try_to_free_pages().

<i.e>

- When System is under low memory load,
memory reclaim may not occur.

$ free
total used free shared buffers cached
Mem: 8197800 1577300 6620500 0 4808 1516724
-/+ buffers/cache: 55768 8142032
Swap: 16386292 0 16386292

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
r b swpd free buff cache si so bi bo in cs us sy id wa
0 0 0 5069748 10612 3014060 0 0 0 0 3 26 0 0 100 0
0 0 0 5069748 10612 3014060 0 0 0 0 4 22 0 0 100 0
0 0 0 5069748 10612 3014060 0 0 0 0 3 18 0 0 100 0

Measure the time of tar command.

$ ls -s test.dat
1501472 test.dat

$ time tar cvf test.tar test.dat
real 0m13.388s
user 0m0.116s
sys 0m5.304s

$ ./delayget -d -p <pid>
CPU count real total virtual total delay total
428 5528345500 5477116080 62749891
IO count delay total
338 8078977189
SWAP count delay total
0 0
RECLAIM count delay total
0 0

- When system is under heavy memory load
memory reclaim may occur.

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
r b swpd free buff cache si so bi bo in cs us sy id wa
0 0 7159032 49724 1812 3012 0 0 0 0 3 24 0 0 100 0
0 0 7159032 49724 1812 3012 0 0 0 0 4 24 0 0 100 0
0 0 7159032 49848 1812 3012 0 0 0 0 3 22 0 0 100 0

In this case, one process uses more 8G memory
by execution of malloc() and memset().

$ time tar cvf test.tar test.dat
real 1m38.563s <- increased by 85 sec
user 0m0.140s
sys 0m7.060s

$ ./delayget -d -p <pid>
CPU count real total virtual total delay total
9021 7140446250 7315277975 923201824
IO count delay total
8965 90466349669
SWAP count delay total
3 21036367
RECLAIM count delay total
740 61011951153

In the later case, the value of RECLAIM is increasing.
So, taskstats can show how much memory reclaim influences TAT.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujistu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Keika Kobayashi and committed by
Linus Torvalds
873b4771 3e85ba03

+41
+19
include/linux/delayacct.h
··· 39 39 extern void __delayacct_blkio_end(void); 40 40 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); 41 41 extern __u64 __delayacct_blkio_ticks(struct task_struct *); 42 + extern void __delayacct_freepages_start(void); 43 + extern void __delayacct_freepages_end(void); 42 44 43 45 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) 44 46 { ··· 109 107 return 0; 110 108 } 111 109 110 + static inline void delayacct_freepages_start(void) 111 + { 112 + if (current->delays) 113 + __delayacct_freepages_start(); 114 + } 115 + 116 + static inline void delayacct_freepages_end(void) 117 + { 118 + if (current->delays) 119 + __delayacct_freepages_end(); 120 + } 121 + 112 122 #else 113 123 static inline void delayacct_set_flag(int flag) 114 124 {} ··· 143 129 { return 0; } 144 130 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) 145 131 { return 0; } 132 + static inline void delayacct_freepages_start(void) 133 + {} 134 + static inline void delayacct_freepages_end(void) 135 + {} 136 + 146 137 #endif /* CONFIG_TASK_DELAY_ACCT */ 147 138 148 139 #endif
+4
include/linux/sched.h
··· 672 672 /* io operations performed */ 673 673 u32 swapin_count; /* total count of the number of swapin block */ 674 674 /* io operations performed */ 675 + 676 + struct timespec freepages_start, freepages_end; 677 + u64 freepages_delay; /* wait for memory reclaim */ 678 + u32 freepages_count; /* total count of memory reclaim */ 675 679 }; 676 680 #endif /* CONFIG_TASK_DELAY_ACCT */ 677 681
+13
kernel/delayacct.c
··· 165 165 return ret; 166 166 } 167 167 168 + void __delayacct_freepages_start(void) 169 + { 170 + delayacct_start(&current->delays->freepages_start); 171 + } 172 + 173 + void __delayacct_freepages_end(void) 174 + { 175 + delayacct_end(&current->delays->freepages_start, 176 + &current->delays->freepages_end, 177 + &current->delays->freepages_delay, 178 + &current->delays->freepages_count); 179 + } 180 +
+5
mm/vmscan.c
··· 38 38 #include <linux/kthread.h> 39 39 #include <linux/freezer.h> 40 40 #include <linux/memcontrol.h> 41 + #include <linux/delayacct.h> 41 42 42 43 #include <asm/tlbflush.h> 43 44 #include <asm/div64.h> ··· 1317 1316 struct zone *zone; 1318 1317 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); 1319 1318 1319 + delayacct_freepages_start(); 1320 + 1320 1321 if (scan_global_lru(sc)) 1321 1322 count_vm_event(ALLOCSTALL); 1322 1323 /* ··· 1398 1395 } 1399 1396 } else 1400 1397 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); 1398 + 1399 + delayacct_freepages_end(); 1401 1400 1402 1401 return ret; 1403 1402 }