Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: Seperate read and write statistics of in_flight requests v2

Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read
and write statistics of in_flight requests. And exported the number
of read and write requests in progress seperately through sysfs.

But Corrado Zoccolo <czoccolo@gmail.com> reported getting strange
output from "iostat -kx 2". Global values for service time and
utilization were garbage. For interval values, utilization was always
100%, and service time is higher than normal.

So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b

The problem was in part_round_stats_single(), I missed the following:
if (now == part->stamp)
return;

- if (part->in_flight) {
+ if (part_in_flight(part)) {
__part_stat_add(cpu, part, time_in_queue,
part_in_flight(part) * (now - part->stamp));
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));

With this chunk included, the reported regression gets fixed.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

--
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

authored by

Nikanth Karthikesan and committed by
Jens Axboe
316d315b 23e018a1

+43 -20
+4 -4
block/blk-core.c
··· 70 70 part_stat_inc(cpu, part, merges[rw]); 71 71 else { 72 72 part_round_stats(cpu, part); 73 - part_inc_in_flight(part); 73 + part_inc_in_flight(part, rw); 74 74 } 75 75 76 76 part_stat_unlock(); ··· 1030 1030 if (now == part->stamp) 1031 1031 return; 1032 1032 1033 - if (part->in_flight) { 1033 + if (part_in_flight(part)) { 1034 1034 __part_stat_add(cpu, part, time_in_queue, 1035 - part->in_flight * (now - part->stamp)); 1035 + part_in_flight(part) * (now - part->stamp)); 1036 1036 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 1037 1037 } 1038 1038 part->stamp = now; ··· 1739 1739 part_stat_inc(cpu, part, ios[rw]); 1740 1740 part_stat_add(cpu, part, ticks[rw], duration); 1741 1741 part_round_stats(cpu, part); 1742 - part_dec_in_flight(part); 1742 + part_dec_in_flight(part, rw); 1743 1743 1744 1744 part_stat_unlock(); 1745 1745 }
+1 -1
block/blk-merge.c
··· 351 351 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 352 352 353 353 part_round_stats(cpu, part); 354 - part_dec_in_flight(part); 354 + part_dec_in_flight(part, rq_data_dir(req)); 355 355 356 356 part_stat_unlock(); 357 357 }
+3 -1
block/genhd.c
··· 869 869 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 870 870 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 871 871 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 872 + static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 872 873 #ifdef CONFIG_FAIL_MAKE_REQUEST 873 874 static struct device_attribute dev_attr_fail = 874 875 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); ··· 889 888 &dev_attr_alignment_offset.attr, 890 889 &dev_attr_capability.attr, 891 890 &dev_attr_stat.attr, 891 + &dev_attr_inflight.attr, 892 892 #ifdef CONFIG_FAIL_MAKE_REQUEST 893 893 &dev_attr_fail.attr, 894 894 #endif ··· 1055 1053 part_stat_read(hd, merges[1]), 1056 1054 (unsigned long long)part_stat_read(hd, sectors[1]), 1057 1055 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1058 - hd->in_flight, 1056 + part_in_flight(hd), 1059 1057 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1060 1058 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1061 1059 );
+10 -6
drivers/md/dm.c
··· 130 130 /* 131 131 * A list of ios that arrived while we were suspended. 132 132 */ 133 - atomic_t pending; 133 + atomic_t pending[2]; 134 134 wait_queue_head_t wait; 135 135 struct work_struct work; 136 136 struct bio_list deferred; ··· 453 453 { 454 454 struct mapped_device *md = io->md; 455 455 int cpu; 456 + int rw = bio_data_dir(io->bio); 456 457 457 458 io->start_time = jiffies; 458 459 459 460 cpu = part_stat_lock(); 460 461 part_round_stats(cpu, &dm_disk(md)->part0); 461 462 part_stat_unlock(); 462 - dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); 463 + dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); 463 464 } 464 465 465 466 static void end_io_acct(struct dm_io *io) ··· 480 479 * After this is decremented the bio must not be touched if it is 481 480 * a barrier. 482 481 */ 483 - dm_disk(md)->part0.in_flight = pending = 484 - atomic_dec_return(&md->pending); 482 + dm_disk(md)->part0.in_flight[rw] = pending = 483 + atomic_dec_return(&md->pending[rw]); 484 + pending += atomic_read(&md->pending[rw^0x1]); 485 485 486 486 /* nudge anyone waiting on suspend queue */ 487 487 if (!pending) ··· 1787 1785 if (!md->disk) 1788 1786 goto bad_disk; 1789 1787 1790 - atomic_set(&md->pending, 0); 1788 + atomic_set(&md->pending[0], 0); 1789 + atomic_set(&md->pending[1], 0); 1791 1790 init_waitqueue_head(&md->wait); 1792 1791 INIT_WORK(&md->work, dm_wq_work); 1793 1792 init_waitqueue_head(&md->eventq); ··· 2091 2088 break; 2092 2089 } 2093 2090 spin_unlock_irqrestore(q->queue_lock, flags); 2094 - } else if (!atomic_read(&md->pending)) 2091 + } else if (!atomic_read(&md->pending[0]) && 2092 + !atomic_read(&md->pending[1])) 2095 2093 break; 2096 2094 2097 2095 if (interruptible == TASK_INTERRUPTIBLE &&
+11 -1
fs/partitions/check.c
··· 248 248 part_stat_read(p, merges[WRITE]), 249 249 (unsigned long long)part_stat_read(p, sectors[WRITE]), 250 250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 251 - p->in_flight, 251 + part_in_flight(p), 252 252 jiffies_to_msecs(part_stat_read(p, io_ticks)), 253 253 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 254 + } 255 + 256 + ssize_t part_inflight_show(struct device *dev, 257 + struct device_attribute *attr, char *buf) 258 + { 259 + struct hd_struct *p = dev_to_part(dev); 260 + 261 + return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); 254 262 } 255 263 256 264 #ifdef CONFIG_FAIL_MAKE_REQUEST ··· 289 281 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 290 282 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 291 283 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 284 + static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 292 285 #ifdef CONFIG_FAIL_MAKE_REQUEST 293 286 static struct device_attribute dev_attr_fail = 294 287 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); ··· 301 292 &dev_attr_size.attr, 302 293 &dev_attr_alignment_offset.attr, 303 294 &dev_attr_stat.attr, 295 + &dev_attr_inflight.attr, 304 296 #ifdef CONFIG_FAIL_MAKE_REQUEST 305 297 &dev_attr_fail.attr, 306 298 #endif
+14 -7
include/linux/genhd.h
··· 98 98 int make_it_fail; 99 99 #endif 100 100 unsigned long stamp; 101 - int in_flight; 101 + int in_flight[2]; 102 102 #ifdef CONFIG_SMP 103 103 struct disk_stats *dkstats; 104 104 #else ··· 322 322 #define part_stat_sub(cpu, gendiskp, field, subnd) \ 323 323 part_stat_add(cpu, gendiskp, field, -subnd) 324 324 325 - static inline void part_inc_in_flight(struct hd_struct *part) 325 + static inline void part_inc_in_flight(struct hd_struct *part, int rw) 326 326 { 327 - part->in_flight++; 327 + part->in_flight[rw]++; 328 328 if (part->partno) 329 - part_to_disk(part)->part0.in_flight++; 329 + part_to_disk(part)->part0.in_flight[rw]++; 330 330 } 331 331 332 - static inline void part_dec_in_flight(struct hd_struct *part) 332 + static inline void part_dec_in_flight(struct hd_struct *part, int rw) 333 333 { 334 - part->in_flight--; 334 + part->in_flight[rw]--; 335 335 if (part->partno) 336 - part_to_disk(part)->part0.in_flight--; 336 + part_to_disk(part)->part0.in_flight[rw]--; 337 + } 338 + 339 + static inline int part_in_flight(struct hd_struct *part) 340 + { 341 + return part->in_flight[0] + part->in_flight[1]; 337 342 } 338 343 339 344 /* block/blk-core.c */ ··· 550 545 extern ssize_t part_size_show(struct device *dev, 551 546 struct device_attribute *attr, char *buf); 552 547 extern ssize_t part_stat_show(struct device *dev, 548 + struct device_attribute *attr, char *buf); 549 + extern ssize_t part_inflight_show(struct device *dev, 553 550 struct device_attribute *attr, char *buf); 554 551 #ifdef CONFIG_FAIL_MAKE_REQUEST 555 552 extern ssize_t part_fail_show(struct device *dev,