Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

task IO accounting: provide distinct tgid/tid I/O statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io. This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
alive before we iterate over the threads, but also says that we can do
that fixup on top of this later. - Linus ]

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andrea Righi and committed by
Linus Torvalds
297c5d92 0c18d7a5

+107 -14
+70 -14
fs/proc/base.c
··· 2376 2376 } 2377 2377 2378 2378 #ifdef CONFIG_TASK_IO_ACCOUNTING 2379 - static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2379 + static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2380 2380 { 2381 + u64 rchar, wchar, syscr, syscw; 2382 + struct task_io_accounting ioac; 2383 + 2384 + if (!whole) { 2385 + rchar = task->rchar; 2386 + wchar = task->wchar; 2387 + syscr = task->syscr; 2388 + syscw = task->syscw; 2389 + memcpy(&ioac, &task->ioac, sizeof(ioac)); 2390 + } else { 2391 + unsigned long flags; 2392 + struct task_struct *t = task; 2393 + rchar = wchar = syscr = syscw = 0; 2394 + memset(&ioac, 0, sizeof(ioac)); 2395 + 2396 + rcu_read_lock(); 2397 + do { 2398 + rchar += t->rchar; 2399 + wchar += t->wchar; 2400 + syscr += t->syscr; 2401 + syscw += t->syscw; 2402 + 2403 + ioac.read_bytes += t->ioac.read_bytes; 2404 + ioac.write_bytes += t->ioac.write_bytes; 2405 + ioac.cancelled_write_bytes += 2406 + t->ioac.cancelled_write_bytes; 2407 + t = next_thread(t); 2408 + } while (t != task); 2409 + rcu_read_unlock(); 2410 + 2411 + if (lock_task_sighand(task, &flags)) { 2412 + struct signal_struct *sig = task->signal; 2413 + 2414 + rchar += sig->rchar; 2415 + wchar += sig->wchar; 2416 + syscr += sig->syscr; 2417 + syscw += sig->syscw; 2418 + 2419 + ioac.read_bytes += sig->ioac.read_bytes; 2420 + ioac.write_bytes += sig->ioac.write_bytes; 2421 + ioac.cancelled_write_bytes += 2422 + sig->ioac.cancelled_write_bytes; 2423 + 2424 + unlock_task_sighand(task, &flags); 2425 + } 2426 + } 2427 + 2381 2428 return sprintf(buffer, 2382 - #ifdef CONFIG_TASK_XACCT 2383 2429 "rchar: %llu\n" 2384 2430 "wchar: %llu\n" 2385 2431 "syscr: %llu\n" 2386 2432 "syscw: %llu\n" 2387 - #endif 2388 2433 "read_bytes: %llu\n" 2389 2434 "write_bytes: %llu\n" 2390 2435 "cancelled_write_bytes: %llu\n", 2391 - #ifdef CONFIG_TASK_XACCT 2392 - (unsigned long long)task->rchar, 2393 - (unsigned long long)task->wchar, 2394 - (unsigned long long)task->syscr, 2395 - (unsigned long long)task->syscw, 2396 - #endif 2397 - (unsigned long long)task->ioac.read_bytes, 2398 - (unsigned long long)task->ioac.write_bytes, 2399 - (unsigned long long)task->ioac.cancelled_write_bytes); 2436 + (unsigned long long)rchar, 2437 + (unsigned long long)wchar, 2438 + (unsigned long long)syscr, 2439 + (unsigned long long)syscw, 2440 + (unsigned long long)ioac.read_bytes, 2441 + (unsigned long long)ioac.write_bytes, 2442 + (unsigned long long)ioac.cancelled_write_bytes); 2400 2443 } 2401 - #endif 2444 + 2445 + static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2446 + { 2447 + return do_io_accounting(task, buffer, 0); 2448 + } 2449 + 2450 + static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2451 + { 2452 + return do_io_accounting(task, buffer, 1); 2453 + } 2454 + #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2402 2455 2403 2456 /* 2404 2457 * Thread groups ··· 2523 2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2524 2471 #endif 2525 2472 #ifdef CONFIG_TASK_IO_ACCOUNTING 2526 - INF("io", S_IRUGO, pid_io_accounting), 2473 + INF("io", S_IRUGO, tgid_io_accounting), 2527 2474 #endif 2528 2475 }; 2529 2476 ··· 2849 2796 #endif 2850 2797 #ifdef CONFIG_FAULT_INJECTION 2851 2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2799 + #endif 2800 + #ifdef CONFIG_TASK_IO_ACCOUNTING 2801 + INF("io", S_IRUGO, tid_io_accounting), 2852 2802 #endif 2853 2803 }; 2854 2804
+4
include/linux/sched.h
··· 506 506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 507 507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; 508 508 unsigned long inblock, oublock, cinblock, coublock; 509 + #ifdef CONFIG_TASK_XACCT 510 + u64 rchar, wchar, syscr, syscw; 511 + #endif 512 + struct task_io_accounting ioac; 509 513 510 514 /* 511 515 * Cumulative ns of scheduled CPU time for dead threads in the
+27
kernel/exit.c
··· 120 120 sig->nivcsw += tsk->nivcsw; 121 121 sig->inblock += task_io_get_inblock(tsk); 122 122 sig->oublock += task_io_get_oublock(tsk); 123 + #ifdef CONFIG_TASK_XACCT 124 + sig->rchar += tsk->rchar; 125 + sig->wchar += tsk->wchar; 126 + sig->syscr += tsk->syscr; 127 + sig->syscw += tsk->syscw; 128 + #endif /* CONFIG_TASK_XACCT */ 129 + #ifdef CONFIG_TASK_IO_ACCOUNTING 130 + sig->ioac.read_bytes += tsk->ioac.read_bytes; 131 + sig->ioac.write_bytes += tsk->ioac.write_bytes; 132 + sig->ioac.cancelled_write_bytes += 133 + tsk->ioac.cancelled_write_bytes; 134 + #endif /* CONFIG_TASK_IO_ACCOUNTING */ 123 135 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 124 136 sig = NULL; /* Marker for below. */ 125 137 } ··· 1378 1366 psig->coublock += 1379 1367 task_io_get_oublock(p) + 1380 1368 sig->oublock + sig->coublock; 1369 + #ifdef CONFIG_TASK_XACCT 1370 + psig->rchar += p->rchar + sig->rchar; 1371 + psig->wchar += p->wchar + sig->wchar; 1372 + psig->syscr += p->syscr + sig->syscr; 1373 + psig->syscw += p->syscw + sig->syscw; 1374 + #endif /* CONFIG_TASK_XACCT */ 1375 + #ifdef CONFIG_TASK_IO_ACCOUNTING 1376 + psig->ioac.read_bytes += 1377 + p->ioac.read_bytes + sig->ioac.read_bytes; 1378 + psig->ioac.write_bytes += 1379 + p->ioac.write_bytes + sig->ioac.write_bytes; 1380 + psig->ioac.cancelled_write_bytes += 1381 + p->ioac.cancelled_write_bytes + 1382 + sig->ioac.cancelled_write_bytes; 1383 + #endif /* CONFIG_TASK_IO_ACCOUNTING */ 1381 1384 spin_unlock_irq(&p->parent->sighand->siglock); 1382 1385 } 1383 1386
+6
kernel/fork.c
··· 812 812 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 813 813 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 814 814 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 815 + #ifdef CONFIG_TASK_XACCT 816 + sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0; 817 + #endif 818 + #ifdef CONFIG_TASK_IO_ACCOUNTING 819 + memset(&sig->ioac, 0, sizeof(sig->ioac)); 820 + #endif 815 821 sig->sum_sched_runtime = 0; 816 822 INIT_LIST_HEAD(&sig->cpu_timers[0]); 817 823 INIT_LIST_HEAD(&sig->cpu_timers[1]);