commit 7f8635cc9e66a26d7280ba680b044fa2f65104af · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
cciss: fix cciss_revalidate panic
block: max hardware sectors limit wrapper
block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead
blk-throttle: Correct the placement of smp_rmb()
blk-throttle: Trim/adjust slice_end once a bio has been dispatched
block: check for proper length of iov entries earlier in blk_rq_map_user_iov()
drbd: fix for spin_lock_irqsave in endio callback
drbd: don't recvmsg with zero length

Linus Torvalds 15 years ago 7f8635cc 3cb50ddf

+85 -73

13 changed files

expand all

unified split

block

blk-map.c

blk-merge.c

blk-settings.c

blk-sysfs.c

blk-throttle.c

drivers

block

cciss.c

drbd

drbd_receiver.c

drbd_req.h

drbd_worker.c

dm-table.c

md.c

scsi

scsi_lib.c

include

linux

blkdev.h

+3 -2

block/blk-map.c

··· 201 201 for (i = 0; i < iov_count; i++) { 202 202 unsigned long uaddr = (unsigned long)iov[i].iov_base; 203 203 204 + if (!iov[i].iov_len) 205 + return -EINVAL; 206 + 204 207 if (uaddr & queue_dma_alignment(q)) { 205 208 unaligned = 1; 206 209 break; 207 210 } 208 - if (!iov[i].iov_len) 209 - return -EINVAL; 210 211 } 211 212 212 213 if (unaligned || (q->dma_pad_mask & len) || map_data)

+3 -3

block/blk-merge.c

··· 21 21 return 0; 22 22 23 23 fbio = bio; 24 - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 24 + cluster = blk_queue_cluster(q); 25 25 seg_size = 0; 26 26 nr_phys_segs = 0; 27 27 for_each_bio(bio) { ··· 87 87 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 88 88 struct bio *nxt) 89 89 { 90 - if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 90 + if (!blk_queue_cluster(q)) 91 91 return 0; 92 92 93 93 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > ··· 123 123 int nsegs, cluster; 124 124 125 125 nsegs = 0; 126 - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 126 + cluster = blk_queue_cluster(q); 127 127 128 128 /* 129 129 * for each bio in rq

+22 -29

block/blk-settings.c

··· 126 126 lim->alignment_offset = 0; 127 127 lim->io_opt = 0; 128 128 lim->misaligned = 0; 129 - lim->no_cluster = 0; 129 + lim->cluster = 1; 130 130 } 131 131 EXPORT_SYMBOL(blk_set_default_limits); 132 132 ··· 229 229 EXPORT_SYMBOL(blk_queue_bounce_limit); 230 230 231 231 /** 232 - * blk_queue_max_hw_sectors - set max sectors for a request for this queue 233 - * @q: the request queue for the device 232 + * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request 233 + * @limits: the queue limits 234 234 * @max_hw_sectors: max hardware sectors in the usual 512b unit 235 235 * 236 236 * Description: ··· 244 244 * per-device basis in /sys/block/<device>/queue/max_sectors_kb. 245 245 * The soft limit can not exceed max_hw_sectors. 246 246 **/ 247 - void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) 247 + void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors) 248 248 { 249 249 if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { 250 250 max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); ··· 252 252 __func__, max_hw_sectors); 253 253 } 254 254 255 - q->limits.max_hw_sectors = max_hw_sectors; 256 - q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, 257 - BLK_DEF_MAX_SECTORS); 255 + limits->max_hw_sectors = max_hw_sectors; 256 + limits->max_sectors = min_t(unsigned int, max_hw_sectors, 257 + BLK_DEF_MAX_SECTORS); 258 + } 259 + EXPORT_SYMBOL(blk_limits_max_hw_sectors); 260 + 261 + /** 262 + * blk_queue_max_hw_sectors - set max sectors for a request for this queue 263 + * @q: the request queue for the device 264 + * @max_hw_sectors: max hardware sectors in the usual 512b unit 265 + * 266 + * Description: 267 + * See description for blk_limits_max_hw_sectors(). 268 + **/ 269 + void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) 270 + { 271 + blk_limits_max_hw_sectors(&q->limits, max_hw_sectors); 258 272 } 259 273 EXPORT_SYMBOL(blk_queue_max_hw_sectors); 260 274 ··· 478 464 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 479 465 { 480 466 blk_stack_limits(&t->limits, &b->limits, 0); 481 - 482 - if (!t->queue_lock) 483 - WARN_ON_ONCE(1); 484 - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 485 - unsigned long flags; 486 - spin_lock_irqsave(t->queue_lock, flags); 487 - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 488 - spin_unlock_irqrestore(t->queue_lock, flags); 489 - } 490 467 } 491 468 EXPORT_SYMBOL(blk_queue_stack_limits); 492 469 ··· 550 545 t->io_min = max(t->io_min, b->io_min); 551 546 t->io_opt = lcm(t->io_opt, b->io_opt); 552 547 553 - t->no_cluster |= b->no_cluster; 548 + t->cluster &= b->cluster; 554 549 t->discard_zeroes_data &= b->discard_zeroes_data; 555 550 556 551 /* Physical block size a multiple of the logical block size? */ ··· 646 641 sector_t offset) 647 642 { 648 643 struct request_queue *t = disk->queue; 649 - struct request_queue *b = bdev_get_queue(bdev); 650 644 651 645 if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { 652 646 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; ··· 655 651 656 652 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", 657 653 top, bottom); 658 - } 659 - 660 - if (!t->queue_lock) 661 - WARN_ON_ONCE(1); 662 - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 663 - unsigned long flags; 664 - 665 - spin_lock_irqsave(t->queue_lock, flags); 666 - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 667 - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 668 - spin_unlock_irqrestore(t->queue_lock, flags); 669 654 } 670 655 } 671 656 EXPORT_SYMBOL(disk_stack_limits);

+1 -1

block/blk-sysfs.c

··· 119 119 120 120 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) 121 121 { 122 - if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 122 + if (blk_queue_cluster(q)) 123 123 return queue_var_show(queue_max_segment_size(q), (page)); 124 124 125 125 return queue_var_show(PAGE_CACHE_SIZE, (page));

+25 -14

block/blk-throttle.c

··· 355 355 tg->slice_end[rw], jiffies); 356 356 } 357 357 358 + static inline void throtl_set_slice_end(struct throtl_data *td, 359 + struct throtl_grp *tg, bool rw, unsigned long jiffy_end) 360 + { 361 + tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); 362 + } 363 + 358 364 static inline void throtl_extend_slice(struct throtl_data *td, 359 365 struct throtl_grp *tg, bool rw, unsigned long jiffy_end) 360 366 { ··· 396 390 */ 397 391 if (throtl_slice_used(td, tg, rw)) 398 392 return; 393 + 394 + /* 395 + * A bio has been dispatched. Also adjust slice_end. It might happen 396 + * that initially cgroup limit was very low resulting in high 397 + * slice_end, but later limit was bumped up and bio was dispached 398 + * sooner, then we need to reduce slice_end. A high bogus slice_end 399 + * is bad because it does not allow new slice to start. 400 + */ 401 + 402 + throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); 399 403 400 404 time_elapsed = jiffies - tg->slice_start[rw]; 401 405 ··· 725 709 struct throtl_grp *tg; 726 710 struct hlist_node *pos, *n; 727 711 728 - /* 729 - * Make sure atomic_inc() effects from 730 - * throtl_update_blkio_group_read_bps(), group of functions are 731 - * visible. 732 - * Is this required or smp_mb__after_atomic_inc() was suffcient 733 - * after the atomic_inc(). 734 - */ 735 - smp_rmb(); 736 712 if (!atomic_read(&td->limits_changed)) 737 713 return; 738 714 739 715 throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); 740 716 741 - hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 742 - /* 743 - * Do I need an smp_rmb() here to make sure tg->limits_changed 744 - * update is visible. I am relying on smp_rmb() at the 745 - * beginning of function and not putting a new one here. 746 - */ 717 + /* 718 + * Make sure updates from throtl_update_blkio_group_read_bps() group 719 + * of functions to tg->limits_changed are visible. We do not 720 + * want update td->limits_changed to be visible but update to 721 + * tg->limits_changed not being visible yet on this cpu. Hence 722 + * the read barrier. 723 + */ 724 + smp_rmb(); 747 725 726 + hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 748 727 if (throtl_tg_on_rr(tg) && tg->limits_changed) { 749 728 throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" 750 729 " riops=%u wiops=%u", tg->bps[READ],

drivers/block/cciss.c

··· 2834 2834 InquiryData_struct *inq_buff = NULL; 2835 2835 2836 2836 for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { 2837 + if (!h->drv[logvol]) 2838 + continue 2837 2839 if (memcmp(h->drv[logvol]->LunID, drv->LunID, 2838 2840 sizeof(drv->LunID)) == 0) { 2839 2841 FOUND = 1;

+8 -6

drivers/block/drbd/drbd_receiver.c

··· 3627 3627 } 3628 3628 3629 3629 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); 3630 - rv = drbd_recv(mdev, &header->h80.payload, shs); 3631 - if (unlikely(rv != shs)) { 3632 - dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); 3633 - goto err_out; 3634 - } 3635 - 3636 3630 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { 3637 3631 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); 3638 3632 goto err_out; 3633 + } 3634 + 3635 + if (shs) { 3636 + rv = drbd_recv(mdev, &header->h80.payload, shs); 3637 + if (unlikely(rv != shs)) { 3638 + dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); 3639 + goto err_out; 3640 + } 3639 3641 } 3640 3642 3641 3643 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);

+2 -1

drivers/block/drbd/drbd_req.h

··· 339 339 } 340 340 341 341 /* completion of master bio is outside of spinlock. 342 - * If you need it irqsave, do it your self! */ 342 + * If you need it irqsave, do it your self! 343 + * Which means: don't use from bio endio callback. */ 343 344 static inline int req_mod(struct drbd_request *req, 344 345 enum drbd_req_event what) 345 346 {

+9 -1

drivers/block/drbd/drbd_worker.c

··· 193 193 */ 194 194 void drbd_endio_pri(struct bio *bio, int error) 195 195 { 196 + unsigned long flags; 196 197 struct drbd_request *req = bio->bi_private; 197 198 struct drbd_conf *mdev = req->mdev; 199 + struct bio_and_error m; 198 200 enum drbd_req_event what; 199 201 int uptodate = bio_flagged(bio, BIO_UPTODATE); 200 202 ··· 222 220 bio_put(req->private_bio); 223 221 req->private_bio = ERR_PTR(error); 224 222 225 - req_mod(req, what); 223 + /* not req_mod(), we need irqsave here! */ 224 + spin_lock_irqsave(&mdev->req_lock, flags); 225 + __req_mod(req, what, &m); 226 + spin_unlock_irqrestore(&mdev->req_lock, flags); 227 + 228 + if (m.bio) 229 + complete_master_bio(mdev, &m); 226 230 } 227 231 228 232 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)

+2 -8

drivers/md/dm-table.c

··· 517 517 */ 518 518 519 519 if (q->merge_bvec_fn && !ti->type->merge) 520 - limits->max_sectors = 521 - min_not_zero(limits->max_sectors, 522 - (unsigned int) (PAGE_SIZE >> 9)); 520 + blk_limits_max_hw_sectors(limits, 521 + (unsigned int) (PAGE_SIZE >> 9)); 523 522 return 0; 524 523 } 525 524 EXPORT_SYMBOL_GPL(dm_set_device_limits); ··· 1129 1130 * Copy table's limits to the DM device's request_queue 1130 1131 */ 1131 1132 q->limits = *limits; 1132 - 1133 - if (limits->no_cluster) 1134 - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1135 - else 1136 - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1137 1133 1138 1134 if (!dm_table_supports_discards(t)) 1139 1135 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);

-3

drivers/md/md.c

··· 4295 4295 goto abort; 4296 4296 mddev->queue->queuedata = mddev; 4297 4297 4298 - /* Can be unlocked because the queue is new: no concurrency */ 4299 - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); 4300 - 4301 4298 blk_queue_make_request(mddev->queue, md_make_request); 4302 4299 4303 4300 disk = alloc_disk(1 << shift);

+1 -2

drivers/scsi/scsi_lib.c

··· 1637 1637 1638 1638 blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); 1639 1639 1640 - /* New queue, no concurrency on queue_flags */ 1641 1640 if (!shost->use_clustering) 1642 - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1641 + q->limits.cluster = 0; 1643 1642 1644 1643 /* 1645 1644 * set a reasonable default alignment on word boundaries: the

+7 -3

include/linux/blkdev.h

··· 250 250 251 251 unsigned char misaligned; 252 252 unsigned char discard_misaligned; 253 - unsigned char no_cluster; 253 + unsigned char cluster; 254 254 signed char discard_zeroes_data; 255 255 }; 256 256 ··· 380 380 #endif 381 381 }; 382 382 383 - #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 384 383 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 385 384 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 386 385 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ ··· 402 403 #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ 403 404 404 405 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 405 - (1 << QUEUE_FLAG_CLUSTER) | \ 406 406 (1 << QUEUE_FLAG_STACKABLE) | \ 407 407 (1 << QUEUE_FLAG_SAME_COMP) | \ 408 408 (1 << QUEUE_FLAG_ADD_RANDOM)) ··· 507 509 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 508 510 509 511 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 512 + 513 + static inline unsigned int blk_queue_cluster(struct request_queue *q) 514 + { 515 + return q->limits.cluster; 516 + } 510 517 511 518 /* 512 519 * We regard a request as sync, if either a read or a sync write ··· 808 805 extern void blk_cleanup_queue(struct request_queue *); 809 806 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 810 807 extern void blk_queue_bounce_limit(struct request_queue *, u64); 808 + extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); 811 809 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 812 810 extern void blk_queue_max_segments(struct request_queue *, unsigned short); 813 811 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);