commit 7f8635cc9e66a26d7280ba680b044fa2f65104af · tjh.dev/kernel

+3 -2

block/blk-map.c

··· 201 for (i = 0; i < iov_count; i++) { 202 unsigned long uaddr = (unsigned long)iov[i].iov_base; 203 204 if (uaddr & queue_dma_alignment(q)) { 205 unaligned = 1; 206 break; 207 } 208 - if (!iov[i].iov_len) 209 - return -EINVAL; 210 } 211 212 if (unaligned || (q->dma_pad_mask & len) || map_data)

··· 201 for (i = 0; i < iov_count; i++) { 202 unsigned long uaddr = (unsigned long)iov[i].iov_base; 203 204 + if (!iov[i].iov_len) 205 + return -EINVAL; 206 + 207 if (uaddr & queue_dma_alignment(q)) { 208 unaligned = 1; 209 break; 210 } 211 } 212 213 if (unaligned || (q->dma_pad_mask & len) || map_data)

+3 -3

block/blk-merge.c

··· 21 return 0; 22 23 fbio = bio; 24 - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 25 seg_size = 0; 26 nr_phys_segs = 0; 27 for_each_bio(bio) { ··· 87 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 88 struct bio *nxt) 89 { 90 - if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 91 return 0; 92 93 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > ··· 123 int nsegs, cluster; 124 125 nsegs = 0; 126 - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 127 128 /* 129 * for each bio in rq

··· 21 return 0; 22 23 fbio = bio; 24 + cluster = blk_queue_cluster(q); 25 seg_size = 0; 26 nr_phys_segs = 0; 27 for_each_bio(bio) { ··· 87 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 88 struct bio *nxt) 89 { 90 + if (!blk_queue_cluster(q)) 91 return 0; 92 93 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > ··· 123 int nsegs, cluster; 124 125 nsegs = 0; 126 + cluster = blk_queue_cluster(q); 127 128 /* 129 * for each bio in rq

+22 -29

block/blk-settings.c

··· 126 lim->alignment_offset = 0; 127 lim->io_opt = 0; 128 lim->misaligned = 0; 129 - lim->no_cluster = 0; 130 } 131 EXPORT_SYMBOL(blk_set_default_limits); 132 ··· 229 EXPORT_SYMBOL(blk_queue_bounce_limit); 230 231 /** 232 - * blk_queue_max_hw_sectors - set max sectors for a request for this queue 233 - * @q: the request queue for the device 234 * @max_hw_sectors: max hardware sectors in the usual 512b unit 235 * 236 * Description: ··· 244 * per-device basis in /sys/block/<device>/queue/max_sectors_kb. 245 * The soft limit can not exceed max_hw_sectors. 246 **/ 247 - void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) 248 { 249 if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { 250 max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); ··· 252 __func__, max_hw_sectors); 253 } 254 255 - q->limits.max_hw_sectors = max_hw_sectors; 256 - q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, 257 - BLK_DEF_MAX_SECTORS); 258 } 259 EXPORT_SYMBOL(blk_queue_max_hw_sectors); 260 ··· 478 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 479 { 480 blk_stack_limits(&t->limits, &b->limits, 0); 481 - 482 - if (!t->queue_lock) 483 - WARN_ON_ONCE(1); 484 - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 485 - unsigned long flags; 486 - spin_lock_irqsave(t->queue_lock, flags); 487 - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 488 - spin_unlock_irqrestore(t->queue_lock, flags); 489 - } 490 } 491 EXPORT_SYMBOL(blk_queue_stack_limits); 492 ··· 550 t->io_min = max(t->io_min, b->io_min); 551 t->io_opt = lcm(t->io_opt, b->io_opt); 552 553 - t->no_cluster |= b->no_cluster; 554 t->discard_zeroes_data &= b->discard_zeroes_data; 555 556 /* Physical block size a multiple of the logical block size? */ ··· 646 sector_t offset) 647 { 648 struct request_queue *t = disk->queue; 649 - struct request_queue *b = bdev_get_queue(bdev); 650 651 if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { 652 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; ··· 655 656 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", 657 top, bottom); 658 - } 659 - 660 - if (!t->queue_lock) 661 - WARN_ON_ONCE(1); 662 - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 663 - unsigned long flags; 664 - 665 - spin_lock_irqsave(t->queue_lock, flags); 666 - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 667 - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 668 - spin_unlock_irqrestore(t->queue_lock, flags); 669 } 670 } 671 EXPORT_SYMBOL(disk_stack_limits);

··· 126 lim->alignment_offset = 0; 127 lim->io_opt = 0; 128 lim->misaligned = 0; 129 + lim->cluster = 1; 130 } 131 EXPORT_SYMBOL(blk_set_default_limits); 132 ··· 229 EXPORT_SYMBOL(blk_queue_bounce_limit); 230 231 /** 232 + * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request 233 + * @limits: the queue limits 234 * @max_hw_sectors: max hardware sectors in the usual 512b unit 235 * 236 * Description: ··· 244 * per-device basis in /sys/block/<device>/queue/max_sectors_kb. 245 * The soft limit can not exceed max_hw_sectors. 246 **/ 247 + void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors) 248 { 249 if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { 250 max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); ··· 252 __func__, max_hw_sectors); 253 } 254 255 + limits->max_hw_sectors = max_hw_sectors; 256 + limits->max_sectors = min_t(unsigned int, max_hw_sectors, 257 + BLK_DEF_MAX_SECTORS); 258 + } 259 + EXPORT_SYMBOL(blk_limits_max_hw_sectors); 260 + 261 + /** 262 + * blk_queue_max_hw_sectors - set max sectors for a request for this queue 263 + * @q: the request queue for the device 264 + * @max_hw_sectors: max hardware sectors in the usual 512b unit 265 + * 266 + * Description: 267 + * See description for blk_limits_max_hw_sectors(). 268 + **/ 269 + void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) 270 + { 271 + blk_limits_max_hw_sectors(&q->limits, max_hw_sectors); 272 } 273 EXPORT_SYMBOL(blk_queue_max_hw_sectors); 274 ··· 464 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 465 { 466 blk_stack_limits(&t->limits, &b->limits, 0); 467 } 468 EXPORT_SYMBOL(blk_queue_stack_limits); 469 ··· 545 t->io_min = max(t->io_min, b->io_min); 546 t->io_opt = lcm(t->io_opt, b->io_opt); 547 548 + t->cluster &= b->cluster; 549 t->discard_zeroes_data &= b->discard_zeroes_data; 550 551 /* Physical block size a multiple of the logical block size? */ ··· 641 sector_t offset) 642 { 643 struct request_queue *t = disk->queue; 644 645 if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { 646 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; ··· 651 652 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", 653 top, bottom); 654 } 655 } 656 EXPORT_SYMBOL(disk_stack_limits);

+1 -1

block/blk-sysfs.c

··· 119 120 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) 121 { 122 - if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 123 return queue_var_show(queue_max_segment_size(q), (page)); 124 125 return queue_var_show(PAGE_CACHE_SIZE, (page));

··· 119 120 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) 121 { 122 + if (blk_queue_cluster(q)) 123 return queue_var_show(queue_max_segment_size(q), (page)); 124 125 return queue_var_show(PAGE_CACHE_SIZE, (page));

+25 -14

block/blk-throttle.c

··· 355 tg->slice_end[rw], jiffies); 356 } 357 358 static inline void throtl_extend_slice(struct throtl_data *td, 359 struct throtl_grp *tg, bool rw, unsigned long jiffy_end) 360 { ··· 396 */ 397 if (throtl_slice_used(td, tg, rw)) 398 return; 399 400 time_elapsed = jiffies - tg->slice_start[rw]; 401 ··· 725 struct throtl_grp *tg; 726 struct hlist_node *pos, *n; 727 728 - /* 729 - * Make sure atomic_inc() effects from 730 - * throtl_update_blkio_group_read_bps(), group of functions are 731 - * visible. 732 - * Is this required or smp_mb__after_atomic_inc() was suffcient 733 - * after the atomic_inc(). 734 - */ 735 - smp_rmb(); 736 if (!atomic_read(&td->limits_changed)) 737 return; 738 739 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 740 741 - hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 742 - /* 743 - * Do I need an smp_rmb() here to make sure tg->limits_changed 744 - * update is visible. I am relying on smp_rmb() at the 745 - * beginning of function and not putting a new one here. 746 - */ 747 748 if (throtl_tg_on_rr(tg) && tg->limits_changed) { 749 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 750 " riops=%u wiops=%u", tg->bps[READ],

··· 355 tg->slice_end[rw], jiffies); 356 } 357 358 + static inline void throtl_set_slice_end(struct throtl_data *td, 359 + struct throtl_grp *tg, bool rw, unsigned long jiffy_end) 360 + { 361 + tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); 362 + } 363 + 364 static inline void throtl_extend_slice(struct throtl_data *td, 365 struct throtl_grp *tg, bool rw, unsigned long jiffy_end) 366 { ··· 390 */ 391 if (throtl_slice_used(td, tg, rw)) 392 return; 393 + 394 + /* 395 + * A bio has been dispatched. Also adjust slice_end. It might happen 396 + * that initially cgroup limit was very low resulting in high 397 + * slice_end, but later limit was bumped up and bio was dispached 398 + * sooner, then we need to reduce slice_end. A high bogus slice_end 399 + * is bad because it does not allow new slice to start. 400 + */ 401 + 402 + throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); 403 404 time_elapsed = jiffies - tg->slice_start[rw]; 405 ··· 709 struct throtl_grp *tg; 710 struct hlist_node *pos, *n; 711 712 if (!atomic_read(&td->limits_changed)) 713 return; 714 715 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 716 717 + /* 718 + * Make sure updates from throtl_update_blkio_group_read_bps() group 719 + * of functions to tg->limits_changed are visible. We do not 720 + * want update td->limits_changed to be visible but update to 721 + * tg->limits_changed not being visible yet on this cpu. Hence 722 + * the read barrier. 723 + */ 724 + smp_rmb(); 725 726 + hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 727 if (throtl_tg_on_rr(tg) && tg->limits_changed) { 728 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 729 " riops=%u wiops=%u", tg->bps[READ],

+2

drivers/block/cciss.c

··· 2834 InquiryData_struct *inq_buff = NULL; 2835 2836 for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { 2837 if (memcmp(h->drv[logvol]->LunID, drv->LunID, 2838 sizeof(drv->LunID)) == 0) { 2839 FOUND = 1;

··· 2834 InquiryData_struct *inq_buff = NULL; 2835 2836 for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { 2837 + if (!h->drv[logvol]) 2838 + continue 2839 if (memcmp(h->drv[logvol]->LunID, drv->LunID, 2840 sizeof(drv->LunID)) == 0) { 2841 FOUND = 1;

+8 -6

drivers/block/drbd/drbd_receiver.c

··· 3627 } 3628 3629 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); 3630 - rv = drbd_recv(mdev, &header->h80.payload, shs); 3631 - if (unlikely(rv != shs)) { 3632 - dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); 3633 - goto err_out; 3634 - } 3635 - 3636 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { 3637 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); 3638 goto err_out; 3639 } 3640 3641 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);

··· 3627 } 3628 3629 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); 3630 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { 3631 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); 3632 goto err_out; 3633 + } 3634 + 3635 + if (shs) { 3636 + rv = drbd_recv(mdev, &header->h80.payload, shs); 3637 + if (unlikely(rv != shs)) { 3638 + dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); 3639 + goto err_out; 3640 + } 3641 } 3642 3643 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);

+2 -1

drivers/block/drbd/drbd_req.h

··· 339 } 340 341 /* completion of master bio is outside of spinlock. 342 - * If you need it irqsave, do it your self! */ 343 static inline int req_mod(struct drbd_request *req, 344 enum drbd_req_event what) 345 {

··· 339 } 340 341 /* completion of master bio is outside of spinlock. 342 + * If you need it irqsave, do it your self! 343 + * Which means: don't use from bio endio callback. */ 344 static inline int req_mod(struct drbd_request *req, 345 enum drbd_req_event what) 346 {

+9 -1

drivers/block/drbd/drbd_worker.c

··· 193 */ 194 void drbd_endio_pri(struct bio *bio, int error) 195 { 196 struct drbd_request *req = bio->bi_private; 197 struct drbd_conf *mdev = req->mdev; 198 enum drbd_req_event what; 199 int uptodate = bio_flagged(bio, BIO_UPTODATE); 200 ··· 222 bio_put(req->private_bio); 223 req->private_bio = ERR_PTR(error); 224 225 - req_mod(req, what); 226 } 227 228 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)

··· 193 */ 194 void drbd_endio_pri(struct bio *bio, int error) 195 { 196 + unsigned long flags; 197 struct drbd_request *req = bio->bi_private; 198 struct drbd_conf *mdev = req->mdev; 199 + struct bio_and_error m; 200 enum drbd_req_event what; 201 int uptodate = bio_flagged(bio, BIO_UPTODATE); 202 ··· 220 bio_put(req->private_bio); 221 req->private_bio = ERR_PTR(error); 222 223 + /* not req_mod(), we need irqsave here! */ 224 + spin_lock_irqsave(&mdev->req_lock, flags); 225 + __req_mod(req, what, &m); 226 + spin_unlock_irqrestore(&mdev->req_lock, flags); 227 + 228 + if (m.bio) 229 + complete_master_bio(mdev, &m); 230 } 231 232 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)

+2 -8

drivers/md/dm-table.c

··· 517 */ 518 519 if (q->merge_bvec_fn && !ti->type->merge) 520 - limits->max_sectors = 521 - min_not_zero(limits->max_sectors, 522 - (unsigned int) (PAGE_SIZE >> 9)); 523 return 0; 524 } 525 EXPORT_SYMBOL_GPL(dm_set_device_limits); ··· 1129 * Copy table's limits to the DM device's request_queue 1130 */ 1131 q->limits = *limits; 1132 - 1133 - if (limits->no_cluster) 1134 - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1135 - else 1136 - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1137 1138 if (!dm_table_supports_discards(t)) 1139 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);

··· 517 */ 518 519 if (q->merge_bvec_fn && !ti->type->merge) 520 + blk_limits_max_hw_sectors(limits, 521 + (unsigned int) (PAGE_SIZE >> 9)); 522 return 0; 523 } 524 EXPORT_SYMBOL_GPL(dm_set_device_limits); ··· 1130 * Copy table's limits to the DM device's request_queue 1131 */ 1132 q->limits = *limits; 1133 1134 if (!dm_table_supports_discards(t)) 1135 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);

-3

drivers/md/md.c

··· 4295 goto abort; 4296 mddev->queue->queuedata = mddev; 4297 4298 - /* Can be unlocked because the queue is new: no concurrency */ 4299 - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); 4300 - 4301 blk_queue_make_request(mddev->queue, md_make_request); 4302 4303 disk = alloc_disk(1 << shift);

··· 4295 goto abort; 4296 mddev->queue->queuedata = mddev; 4297 4298 blk_queue_make_request(mddev->queue, md_make_request); 4299 4300 disk = alloc_disk(1 << shift);

+1 -2

drivers/scsi/scsi_lib.c

··· 1637 1638 blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); 1639 1640 - /* New queue, no concurrency on queue_flags */ 1641 if (!shost->use_clustering) 1642 - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1643 1644 /* 1645 * set a reasonable default alignment on word boundaries: the

··· 1637 1638 blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); 1639 1640 if (!shost->use_clustering) 1641 + q->limits.cluster = 0; 1642 1643 /* 1644 * set a reasonable default alignment on word boundaries: the

+7 -3

include/linux/blkdev.h

··· 250 251 unsigned char misaligned; 252 unsigned char discard_misaligned; 253 - unsigned char no_cluster; 254 signed char discard_zeroes_data; 255 }; 256 ··· 380 #endif 381 }; 382 383 - #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 384 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 385 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 386 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ ··· 402 #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ 403 404 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 405 - (1 << QUEUE_FLAG_CLUSTER) | \ 406 (1 << QUEUE_FLAG_STACKABLE) | \ 407 (1 << QUEUE_FLAG_SAME_COMP) | \ 408 (1 << QUEUE_FLAG_ADD_RANDOM)) ··· 507 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 508 509 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 510 511 /* 512 * We regard a request as sync, if either a read or a sync write ··· 808 extern void blk_cleanup_queue(struct request_queue *); 809 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 810 extern void blk_queue_bounce_limit(struct request_queue *, u64); 811 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 812 extern void blk_queue_max_segments(struct request_queue *, unsigned short); 813 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);

··· 250 251 unsigned char misaligned; 252 unsigned char discard_misaligned; 253 + unsigned char cluster; 254 signed char discard_zeroes_data; 255 }; 256 ··· 380 #endif 381 }; 382 383 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 384 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 385 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ ··· 403 #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ 404 405 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 406 (1 << QUEUE_FLAG_STACKABLE) | \ 407 (1 << QUEUE_FLAG_SAME_COMP) | \ 408 (1 << QUEUE_FLAG_ADD_RANDOM)) ··· 509 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 510 511 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 512 + 513 + static inline unsigned int blk_queue_cluster(struct request_queue *q) 514 + { 515 + return q->limits.cluster; 516 + } 517 518 /* 519 * We regard a request as sync, if either a read or a sync write ··· 805 extern void blk_cleanup_queue(struct request_queue *); 806 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 807 extern void blk_queue_bounce_limit(struct request_queue *, u64); 808 + extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); 809 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 810 extern void blk_queue_max_segments(struct request_queue *, unsigned short); 811 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);