Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
Enhanced partition statistics: documentation update
Enhanced partition statistics: remove old partition statistics
Enhanced partition statistics: procfs
Enhanced partition statistics: sysfs
Enhanced partition statistics: aoe fix
Enhanced partition statistics: update partition statitics
Enhanced partition statistics: core statistics
block: fixup rq_init() a bit

Manually fixed conflict in drivers/block/aoe/aoecmd.c due to statistics
support.

+320 -48
+22
Documentation/ABI/testing/procfs-diskstats
··· 1 + What: /proc/diskstats 2 + Date: February 2008 3 + Contact: Jerome Marchand <jmarchan@redhat.com> 4 + Description: 5 + The /proc/diskstats file displays the I/O statistics 6 + of block devices. Each line contains the following 14 7 + fields: 8 + 1 - major number 9 + 2 - minor mumber 10 + 3 - device name 11 + 4 - reads completed succesfully 12 + 5 - reads merged 13 + 6 - sectors read 14 + 7 - time spent reading (ms) 15 + 8 - writes completed 16 + 9 - writes merged 17 + 10 - sectors written 18 + 11 - time spent writing (ms) 19 + 12 - I/Os currently in progress 20 + 13 - time spent doing I/Os (ms) 21 + 14 - weighted time spent doing I/Os (ms) 22 + For more details refer to Documentation/iostats.txt
+28
Documentation/ABI/testing/sysfs-block
··· 1 + What: /sys/block/<disk>/stat 2 + Date: February 2008 3 + Contact: Jerome Marchand <jmarchan@redhat.com> 4 + Description: 5 + The /sys/block/<disk>/stat files displays the I/O 6 + statistics of disk <disk>. They contain 11 fields: 7 + 1 - reads completed succesfully 8 + 2 - reads merged 9 + 3 - sectors read 10 + 4 - time spent reading (ms) 11 + 5 - writes completed 12 + 6 - writes merged 13 + 7 - sectors written 14 + 8 - time spent writing (ms) 15 + 9 - I/Os currently in progress 16 + 10 - time spent doing I/Os (ms) 17 + 11 - weighted time spent doing I/Os (ms) 18 + For more details refer Documentation/iostats.txt 19 + 20 + 21 + What: /sys/block/<disk>/<part>/stat 22 + Date: February 2008 23 + Contact: Jerome Marchand <jmarchan@redhat.com> 24 + Description: 25 + The /sys/block/<disk>/<part>/stat files display the 26 + I/O statistics of partition <part>. The format is the 27 + same as the above-written /sys/block/<disk>/stat 28 + format.
+14 -1
Documentation/iostats.txt
··· 58 58 Each set of stats only applies to the indicated device; if you want 59 59 system-wide stats you'll have to find all the devices and sum them all up. 60 60 61 - Field 1 -- # of reads issued 61 + Field 1 -- # of reads completed 62 62 This is the total number of reads completed successfully. 63 63 Field 2 -- # of reads merged, field 6 -- # of writes merged 64 64 Reads and writes which are adjacent to each other may be merged for ··· 131 131 words, the number of reads for partitions is counted slightly before time 132 132 of queuing for partitions, and at completion for whole disks. This is 133 133 a subtle distinction that is probably uninteresting for most cases. 134 + 135 + More significant is the error induced by counting the numbers of 136 + reads/writes before merges for partitions and after for disks. Since a 137 + typical workload usually contains a lot of successive and adjacent requests, 138 + the number of reads/writes issued can be several times higher than the 139 + number of reads/writes completed. 140 + 141 + In 2.6.25, the full statistic set is again available for partitions and 142 + disk and partition statistics are consistent again. Since we still don't 143 + keep record of the partition-relative address, an operation is attributed to 144 + the partition which contains the first sector of the request after the 145 + eventual merges. As requests can be merged across partition, this could lead 146 + to some (probably insignificant) innacuracy. 134 147 135 148 Additional notes 136 149 ----------------
+51 -18
block/blk-core.c
··· 60 60 return; 61 61 62 62 if (!new_io) { 63 - __disk_stat_inc(rq->rq_disk, merges[rw]); 63 + __all_stat_inc(rq->rq_disk, merges[rw], rq->sector); 64 64 } else { 65 + struct hd_struct *part = get_part(rq->rq_disk, rq->sector); 65 66 disk_round_stats(rq->rq_disk); 66 67 rq->rq_disk->in_flight++; 68 + if (part) { 69 + part_round_stats(part); 70 + part->in_flight++; 71 + } 67 72 } 68 73 } 69 74 ··· 107 102 } 108 103 EXPORT_SYMBOL(blk_get_backing_dev_info); 109 104 105 + /* 106 + * We can't just memset() the structure, since the allocation path 107 + * already stored some information in the request. 108 + */ 110 109 void rq_init(struct request_queue *q, struct request *rq) 111 110 { 112 111 INIT_LIST_HEAD(&rq->queuelist); 113 112 INIT_LIST_HEAD(&rq->donelist); 114 - 115 - rq->errors = 0; 113 + rq->q = q; 114 + rq->sector = rq->hard_sector = (sector_t) -1; 115 + rq->nr_sectors = rq->hard_nr_sectors = 0; 116 + rq->current_nr_sectors = rq->hard_cur_sectors = 0; 116 117 rq->bio = rq->biotail = NULL; 117 118 INIT_HLIST_NODE(&rq->hash); 118 119 RB_CLEAR_NODE(&rq->rb_node); 119 - rq->ioprio = 0; 120 - rq->buffer = NULL; 121 - rq->ref_count = 1; 122 - rq->q = q; 123 - rq->special = NULL; 124 - rq->data_len = 0; 125 - rq->data = NULL; 120 + rq->rq_disk = NULL; 126 121 rq->nr_phys_segments = 0; 122 + rq->nr_hw_segments = 0; 123 + rq->ioprio = 0; 124 + rq->special = NULL; 125 + rq->buffer = NULL; 126 + rq->tag = -1; 127 + rq->errors = 0; 128 + rq->ref_count = 1; 129 + rq->cmd_len = 0; 130 + memset(rq->cmd, 0, sizeof(rq->cmd)); 131 + rq->data_len = 0; 132 + rq->sense_len = 0; 133 + rq->data = NULL; 127 134 rq->sense = NULL; 128 135 rq->end_io = NULL; 129 136 rq->end_io_data = NULL; 130 - rq->completion_data = NULL; 131 137 rq->next_rq = NULL; 132 138 } 133 139 ··· 1002 986 } 1003 987 EXPORT_SYMBOL_GPL(disk_round_stats); 1004 988 989 + void part_round_stats(struct hd_struct *part) 990 + { 991 + unsigned long now = jiffies; 992 + 993 + if (now == part->stamp) 994 + return; 995 + 996 + if (part->in_flight) { 997 + __part_stat_add(part, time_in_queue, 998 + part->in_flight * (now - part->stamp)); 999 + __part_stat_add(part, io_ticks, (now - part->stamp)); 1000 + } 1001 + part->stamp = now; 1002 + } 1003 + 1005 1004 /* 1006 1005 * queue lock must be held 1007 1006 */ ··· 1219 1188 1220 1189 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1221 1190 struct hd_struct *p = bdev->bd_part; 1222 - const int rw = bio_data_dir(bio); 1223 - 1224 - p->sectors[rw] += bio_sectors(bio); 1225 - p->ios[rw]++; 1226 1191 1227 1192 bio->bi_sector += p->start_sect; 1228 1193 bio->bi_bdev = bdev->bd_contains; ··· 1546 1519 if (blk_fs_request(req) && req->rq_disk) { 1547 1520 const int rw = rq_data_dir(req); 1548 1521 1549 - disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); 1522 + all_stat_add(req->rq_disk, sectors[rw], 1523 + nr_bytes >> 9, req->sector); 1550 1524 } 1551 1525 1552 1526 total_bytes = bio_nbytes = 0; ··· 1732 1704 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1733 1705 unsigned long duration = jiffies - req->start_time; 1734 1706 const int rw = rq_data_dir(req); 1707 + struct hd_struct *part = get_part(disk, req->sector); 1735 1708 1736 - __disk_stat_inc(disk, ios[rw]); 1737 - __disk_stat_add(disk, ticks[rw], duration); 1709 + __all_stat_inc(disk, ios[rw], req->sector); 1710 + __all_stat_add(disk, ticks[rw], duration, req->sector); 1738 1711 disk_round_stats(disk); 1739 1712 disk->in_flight--; 1713 + if (part) { 1714 + part_round_stats(part); 1715 + part->in_flight--; 1716 + } 1740 1717 } 1741 1718 1742 1719 if (req->end_io)
+6
block/blk-merge.c
··· 454 454 elv_merge_requests(q, req, next); 455 455 456 456 if (req->rq_disk) { 457 + struct hd_struct *part 458 + = get_part(req->rq_disk, req->sector); 457 459 disk_round_stats(req->rq_disk); 458 460 req->rq_disk->in_flight--; 461 + if (part) { 462 + part_round_stats(part); 463 + part->in_flight--; 464 + } 459 465 } 460 466 461 467 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+22 -6
block/genhd.c
··· 584 584 for (n = 0; n < gp->minors - 1; n++) { 585 585 struct hd_struct *hd = gp->part[n]; 586 586 587 - if (hd && hd->nr_sects) 588 - seq_printf(s, "%4d %4d %s %u %u %u %u\n", 589 - gp->major, n + gp->first_minor + 1, 590 - disk_name(gp, n + 1, buf), 591 - hd->ios[0], hd->sectors[0], 592 - hd->ios[1], hd->sectors[1]); 587 + if (!hd || !hd->nr_sects) 588 + continue; 589 + 590 + preempt_disable(); 591 + part_round_stats(hd); 592 + preempt_enable(); 593 + seq_printf(s, "%4d %4d %s %lu %lu %llu " 594 + "%u %lu %lu %llu %u %u %u %u\n", 595 + gp->major, n + gp->first_minor + 1, 596 + disk_name(gp, n + 1, buf), 597 + part_stat_read(hd, ios[0]), 598 + part_stat_read(hd, merges[0]), 599 + (unsigned long long)part_stat_read(hd, sectors[0]), 600 + jiffies_to_msecs(part_stat_read(hd, ticks[0])), 601 + part_stat_read(hd, ios[1]), 602 + part_stat_read(hd, merges[1]), 603 + (unsigned long long)part_stat_read(hd, sectors[1]), 604 + jiffies_to_msecs(part_stat_read(hd, ticks[1])), 605 + hd->in_flight, 606 + jiffies_to_msecs(part_stat_read(hd, io_ticks)), 607 + jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 608 + ); 593 609 } 594 610 595 611 return 0;
+6 -6
drivers/block/aoe/aoecmd.c
··· 751 751 } 752 752 753 753 static inline void 754 - diskstats(struct gendisk *disk, struct bio *bio, ulong duration) 754 + diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector) 755 755 { 756 756 unsigned long n_sect = bio->bi_size >> 9; 757 757 const int rw = bio_data_dir(bio); 758 758 759 - disk_stat_inc(disk, ios[rw]); 760 - disk_stat_add(disk, ticks[rw], duration); 761 - disk_stat_add(disk, sectors[rw], n_sect); 762 - disk_stat_add(disk, io_ticks, duration); 759 + all_stat_inc(disk, ios[rw], sector); 760 + all_stat_add(disk, ticks[rw], duration, sector); 761 + all_stat_add(disk, sectors[rw], n_sect, sector); 762 + all_stat_add(disk, io_ticks, duration, sector); 763 763 } 764 764 765 765 void ··· 879 879 } 880 880 881 881 if (buf && --buf->nframesout == 0 && buf->resid == 0) { 882 - diskstats(d->gd, buf->bio, jiffies - buf->stime); 882 + diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); 883 883 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; 884 884 bio_endio(buf->bio, n); 885 885 mempool_free(buf, d->bufpool);
+26 -5
fs/partitions/check.c
··· 18 18 #include <linux/fs.h> 19 19 #include <linux/kmod.h> 20 20 #include <linux/ctype.h> 21 + #include <linux/genhd.h> 21 22 22 23 #include "check.h" 23 24 ··· 216 215 { 217 216 struct hd_struct *p = dev_to_part(dev); 218 217 219 - return sprintf(buf, "%8u %8llu %8u %8llu\n", 220 - p->ios[0], (unsigned long long)p->sectors[0], 221 - p->ios[1], (unsigned long long)p->sectors[1]); 218 + preempt_disable(); 219 + part_round_stats(p); 220 + preempt_enable(); 221 + return sprintf(buf, 222 + "%8lu %8lu %8llu %8u " 223 + "%8lu %8lu %8llu %8u " 224 + "%8u %8u %8u" 225 + "\n", 226 + part_stat_read(p, ios[READ]), 227 + part_stat_read(p, merges[READ]), 228 + (unsigned long long)part_stat_read(p, sectors[READ]), 229 + jiffies_to_msecs(part_stat_read(p, ticks[READ])), 230 + part_stat_read(p, ios[WRITE]), 231 + part_stat_read(p, merges[WRITE]), 232 + (unsigned long long)part_stat_read(p, sectors[WRITE]), 233 + jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 234 + p->in_flight, 235 + jiffies_to_msecs(part_stat_read(p, io_ticks)), 236 + jiffies_to_msecs(part_stat_read(p, time_in_queue))); 222 237 } 223 238 224 239 #ifdef CONFIG_FAIL_MAKE_REQUEST ··· 290 273 static void part_release(struct device *dev) 291 274 { 292 275 struct hd_struct *p = dev_to_part(dev); 276 + free_part_stats(p); 293 277 kfree(p); 294 278 } 295 279 ··· 330 312 disk->part[part-1] = NULL; 331 313 p->start_sect = 0; 332 314 p->nr_sects = 0; 333 - p->ios[0] = p->ios[1] = 0; 334 - p->sectors[0] = p->sectors[1] = 0; 315 + part_stat_set_all(p, 0); 335 316 kobject_put(p->holder_dir); 336 317 device_del(&p->dev); 337 318 put_device(&p->dev); ··· 353 336 if (!p) 354 337 return; 355 338 339 + if (!init_part_stats(p)) { 340 + kfree(p); 341 + return; 342 + } 356 343 p->start_sect = start; 357 344 p->nr_sects = len; 358 345 p->partno = part;
+3 -1
include/linux/blkdev.h
··· 137 137 #define BLK_MAX_CDB 16 138 138 139 139 /* 140 - * try to put the fields that are referenced together in the same cacheline 140 + * try to put the fields that are referenced together in the same cacheline. 141 + * if you modify this structure, be sure to check block/blk-core.c:rq_init() 142 + * as well! 141 143 */ 142 144 struct request { 143 145 struct list_head queuelist;
+142 -11
include/linux/genhd.h
··· 91 91 __le32 nr_sects; /* nr of sectors in partition */ 92 92 } __attribute__((packed)); 93 93 94 + struct disk_stats { 95 + unsigned long sectors[2]; /* READs and WRITEs */ 96 + unsigned long ios[2]; 97 + unsigned long merges[2]; 98 + unsigned long ticks[2]; 99 + unsigned long io_ticks; 100 + unsigned long time_in_queue; 101 + }; 102 + 94 103 struct hd_struct { 95 104 sector_t start_sect; 96 105 sector_t nr_sects; 97 106 struct device dev; 98 107 struct kobject *holder_dir; 99 - unsigned ios[2], sectors[2]; /* READs and WRITEs */ 100 108 int policy, partno; 101 109 #ifdef CONFIG_FAIL_MAKE_REQUEST 102 110 int make_it_fail; 111 + #endif 112 + unsigned long stamp; 113 + int in_flight; 114 + #ifdef CONFIG_SMP 115 + struct disk_stats *dkstats; 116 + #else 117 + struct disk_stats dkstats; 103 118 #endif 104 119 }; 105 120 ··· 126 111 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 127 112 #define GENHD_FL_FAIL 64 128 113 129 - struct disk_stats { 130 - unsigned long sectors[2]; /* READs and WRITEs */ 131 - unsigned long ios[2]; 132 - unsigned long merges[2]; 133 - unsigned long ticks[2]; 134 - unsigned long io_ticks; 135 - unsigned long time_in_queue; 136 - }; 137 - 114 + 138 115 struct gendisk { 139 116 int major; /* major number of driver */ 140 117 int first_minor; ··· 165 158 * The __ variants should only be called in critical sections. The full 166 159 * variants disable/enable preemption. 167 160 */ 161 + static inline struct hd_struct *get_part(struct gendisk *gendiskp, 162 + sector_t sector) 163 + { 164 + struct hd_struct *part; 165 + int i; 166 + for (i = 0; i < gendiskp->minors - 1; i++) { 167 + part = gendiskp->part[i]; 168 + if (part && part->start_sect <= sector 169 + && sector < part->start_sect + part->nr_sects) 170 + return part; 171 + } 172 + return NULL; 173 + } 174 + 168 175 #ifdef CONFIG_SMP 169 176 #define __disk_stat_add(gendiskp, field, addnd) \ 170 177 (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) ··· 198 177 memset(per_cpu_ptr(gendiskp->dkstats, i), value, 199 178 sizeof (struct disk_stats)); 200 179 } 180 + 181 + #define __part_stat_add(part, field, addnd) \ 182 + (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) 183 + 184 + #define __all_stat_add(gendiskp, field, addnd, sector) \ 185 + ({ \ 186 + struct hd_struct *part = get_part(gendiskp, sector); \ 187 + if (part) \ 188 + __part_stat_add(part, field, addnd); \ 189 + __disk_stat_add(gendiskp, field, addnd); \ 190 + }) 191 + 192 + #define part_stat_read(part, field) \ 193 + ({ \ 194 + typeof(part->dkstats->field) res = 0; \ 195 + int i; \ 196 + for_each_possible_cpu(i) \ 197 + res += per_cpu_ptr(part->dkstats, i)->field; \ 198 + res; \ 199 + }) 200 + 201 + static inline void part_stat_set_all(struct hd_struct *part, int value) { 202 + int i; 203 + for_each_possible_cpu(i) 204 + memset(per_cpu_ptr(part->dkstats, i), value, 205 + sizeof(struct disk_stats)); 206 + } 201 207 202 208 #else 203 209 #define __disk_stat_add(gendiskp, field, addnd) \ 204 210 (gendiskp->dkstats.field += addnd) 205 211 #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) 206 212 207 - static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { 213 + static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) 214 + { 208 215 memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); 209 216 } 217 + 218 + #define __part_stat_add(part, field, addnd) \ 219 + (part->dkstats.field += addnd) 220 + 221 + #define __all_stat_add(gendiskp, field, addnd, sector) \ 222 + ({ \ 223 + struct hd_struct *part = get_part(gendiskp, sector); \ 224 + if (part) \ 225 + part->dkstats.field += addnd; \ 226 + __disk_stat_add(gendiskp, field, addnd); \ 227 + }) 228 + 229 + #define part_stat_read(part, field) (part->dkstats.field) 230 + 231 + static inline void part_stat_set_all(struct hd_struct *part, int value) 232 + { 233 + memset(&part->dkstats, value, sizeof(struct disk_stats)); 234 + } 235 + 210 236 #endif 211 237 212 238 #define disk_stat_add(gendiskp, field, addnd) \ ··· 274 206 #define disk_stat_sub(gendiskp, field, subnd) \ 275 207 disk_stat_add(gendiskp, field, -subnd) 276 208 209 + #define part_stat_add(gendiskp, field, addnd) \ 210 + do { \ 211 + preempt_disable(); \ 212 + __part_stat_add(gendiskp, field, addnd);\ 213 + preempt_enable(); \ 214 + } while (0) 215 + 216 + #define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) 217 + #define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) 218 + 219 + #define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) 220 + #define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) 221 + 222 + #define __part_stat_sub(gendiskp, field, subnd) \ 223 + __part_stat_add(gendiskp, field, -subnd) 224 + #define part_stat_sub(gendiskp, field, subnd) \ 225 + part_stat_add(gendiskp, field, -subnd) 226 + 227 + #define all_stat_add(gendiskp, field, addnd, sector) \ 228 + do { \ 229 + preempt_disable(); \ 230 + __all_stat_add(gendiskp, field, addnd, sector); \ 231 + preempt_enable(); \ 232 + } while (0) 233 + 234 + #define __all_stat_dec(gendiskp, field, sector) \ 235 + __all_stat_add(gendiskp, field, -1, sector) 236 + #define all_stat_dec(gendiskp, field, sector) \ 237 + all_stat_add(gendiskp, field, -1, sector) 238 + 239 + #define __all_stat_inc(gendiskp, field, sector) \ 240 + __all_stat_add(gendiskp, field, 1, sector) 241 + #define all_stat_inc(gendiskp, field, sector) \ 242 + all_stat_add(gendiskp, field, 1, sector) 243 + 244 + #define __all_stat_sub(gendiskp, field, subnd, sector) \ 245 + __all_stat_add(gendiskp, field, -subnd, sector) 246 + #define all_stat_sub(gendiskp, field, subnd, sector) \ 247 + all_stat_add(gendiskp, field, -subnd, sector) 277 248 278 249 /* Inlines to alloc and free disk stats in struct gendisk */ 279 250 #ifdef CONFIG_SMP ··· 328 221 { 329 222 free_percpu(disk->dkstats); 330 223 } 224 + 225 + static inline int init_part_stats(struct hd_struct *part) 226 + { 227 + part->dkstats = alloc_percpu(struct disk_stats); 228 + if (!part->dkstats) 229 + return 0; 230 + return 1; 231 + } 232 + 233 + static inline void free_part_stats(struct hd_struct *part) 234 + { 235 + free_percpu(part->dkstats); 236 + } 237 + 331 238 #else /* CONFIG_SMP */ 332 239 static inline int init_disk_stats(struct gendisk *disk) 333 240 { ··· 351 230 static inline void free_disk_stats(struct gendisk *disk) 352 231 { 353 232 } 233 + 234 + static inline int init_part_stats(struct hd_struct *part) 235 + { 236 + return 1; 237 + } 238 + 239 + static inline void free_part_stats(struct hd_struct *part) 240 + { 241 + } 354 242 #endif /* CONFIG_SMP */ 355 243 356 244 /* drivers/block/ll_rw_blk.c */ 357 245 extern void disk_round_stats(struct gendisk *disk); 246 + extern void part_round_stats(struct hd_struct *part); 358 247 359 248 /* drivers/block/genhd.c */ 360 249 extern int get_blkdev_list(char *, int);