Merge branch 'for-linus' of git://git.kernel.dk/linux-block

* 'for-linus' of git://git.kernel.dk/linux-block:
floppy: use del_timer_sync() in init cleanup
blk-cgroup: be able to remove the record of unplugged device
block: Don't check QUEUE_FLAG_SAME_COMP in __blk_complete_request
mm: Add comment explaining task state setting in bdi_forker_thread()
mm: Cleanup clearing of BDI_pending bit in bdi_forker_thread()
block: simplify force plug flush code a little bit
block: change force plug flush call order
block: Fix queue_flag update when rq_affinity goes from 2 to 1
block: separate priority boosting from REQ_META
block: remove READ_META and WRITE_META
xen-blkback: fixed indentation and comments
xen-blkback: Don't disconnect backend until state switched to XenbusStateClosed.

+16 -21
block/blk-cgroup.c
··· 785 785 { 786 786 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 787 787 int ret; 788 - unsigned long major, minor, temp; 788 + unsigned long major, minor; 789 789 int i = 0; 790 790 dev_t dev; 791 - u64 bps, iops; 791 + u64 temp; 792 792 793 793 memset(s, 0, sizeof(s)); 794 794 ··· 826 826 827 827 dev = MKDEV(major, minor); 828 828 829 - ret = blkio_check_dev_num(dev); 829 + ret = strict_strtoull(s[1], 10, &temp); 830 830 if (ret) 831 - return ret; 831 + return -EINVAL; 832 + 833 + /* For rule removal, do not check for device presence. */ 834 + if (temp) { 835 + ret = blkio_check_dev_num(dev); 836 + if (ret) 837 + return ret; 838 + } 832 839 833 840 newpn->dev = dev; 834 841 835 - if (s[1] == NULL) 836 - return -EINVAL; 837 - 838 842 switch (plid) { 839 843 case BLKIO_POLICY_PROP: 840 - ret = strict_strtoul(s[1], 10, &temp); 841 - if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 842 - temp > BLKIO_WEIGHT_MAX) 844 + if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || 845 + temp > BLKIO_WEIGHT_MAX) 843 846 return -EINVAL; 844 847 845 848 newpn->plid = plid; ··· 853 850 switch(fileid) { 854 851 case BLKIO_THROTL_read_bps_device: 855 852 case BLKIO_THROTL_write_bps_device: 856 - ret = strict_strtoull(s[1], 10, &bps); 857 - if (ret) 858 - return -EINVAL; 859 - 860 853 newpn->plid = plid; 861 854 newpn->fileid = fileid; 862 - newpn->val.bps = bps; 855 + newpn->val.bps = temp; 863 856 break; 864 857 case BLKIO_THROTL_read_iops_device: 865 858 case BLKIO_THROTL_write_iops_device: 866 - ret = strict_strtoull(s[1], 10, &iops); 867 - if (ret) 868 - return -EINVAL; 869 - 870 - if (iops > THROTL_IOPS_MAX) 859 + if (temp > THROTL_IOPS_MAX) 871 860 return -EINVAL; 872 861 873 862 newpn->plid = plid; 874 863 newpn->fileid = fileid; 875 - newpn->val.iops = (unsigned int)iops; 864 + newpn->val.iops = (unsigned int)temp; 876 865 break; 877 866 } 878 867 break;
+9 -8
block/blk-core.c
··· 1167 1167 * true if merge was successful, otherwise false. 1168 1168 */ 1169 1169 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, 1170 - struct bio *bio) 1170 + struct bio *bio, unsigned int *request_count) 1171 1171 { 1172 1172 struct blk_plug *plug; 1173 1173 struct request *rq; ··· 1176 1176 plug = tsk->plug; 1177 1177 if (!plug) 1178 1178 goto out; 1179 + *request_count = 0; 1179 1180 1180 1181 list_for_each_entry_reverse(rq, &plug->list, queuelist) { 1181 1182 int el_ret; 1183 + 1184 + (*request_count)++; 1182 1185 1183 1186 if (rq->q != q) 1184 1187 continue; ··· 1222 1219 struct blk_plug *plug; 1223 1220 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; 1224 1221 struct request *req; 1222 + unsigned int request_count = 0; 1225 1223 1226 1224 /* 1227 1225 * low level driver can indicate that it wants pages above a ··· 1241 1237 * Check if we can merge with the plugged list before grabbing 1242 1238 * any locks. 1243 1239 */ 1244 - if (attempt_plug_merge(current, q, bio)) 1240 + if (attempt_plug_merge(current, q, bio, &request_count)) 1245 1241 goto out; 1246 1242 1247 1243 spin_lock_irq(q->queue_lock); ··· 1306 1302 if (__rq->q != q) 1307 1303 plug->should_sort = 1; 1308 1304 } 1309 - list_add_tail(&req->queuelist, &plug->list); 1310 - plug->count++; 1311 - drive_stat_acct(req, 1); 1312 - if (plug->count >= BLK_MAX_REQUEST_COUNT) 1305 + if (request_count >= BLK_MAX_REQUEST_COUNT) 1313 1306 blk_flush_plug_list(plug, false); 1307 + list_add_tail(&req->queuelist, &plug->list); 1308 + drive_stat_acct(req, 1); 1314 1309 } else { 1315 1310 spin_lock_irq(q->queue_lock); 1316 1311 add_acct_request(q, req, where); ··· 2637 2634 INIT_LIST_HEAD(&plug->list); 2638 2635 INIT_LIST_HEAD(&plug->cb_list); 2639 2636 plug->should_sort = 0; 2640 - plug->count = 0; 2641 2637 2642 2638 /* 2643 2639 * If this is a nested plug, don't actually assign it. It will be ··· 2720 2718 return; 2721 2719 2722 2720 list_splice_init(&plug->list, &list); 2723 - plug->count = 0; 2724 2721 2725 2722 if (plug->should_sort) { 2726 2723 list_sort(NULL, &list, plug_rq_cmp);
+1 -1
block/blk-softirq.c
··· 115 115 /* 116 116 * Select completion CPU 117 117 */ 118 - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { 118 + if (req->cpu != -1) { 119 119 ccpu = req->cpu; 120 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { 121 121 ccpu = blk_cpu_to_group(ccpu);
+6 -4
block/blk-sysfs.c
··· 258 258 259 259 ret = queue_var_store(&val, page, count); 260 260 spin_lock_irq(q->queue_lock); 261 - if (val) { 261 + if (val == 2) { 262 262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 263 - if (val == 2) 264 - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 265 - } else { 263 + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 264 + } else if (val == 1) { 265 + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 266 + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 267 + } else if (val == 0) { 266 268 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 267 269 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 268 270 }
+10 -10
block/cfq-iosched.c
··· 130 130 unsigned long slice_end; 131 131 long slice_resid; 132 132 133 - /* pending metadata requests */ 134 - int meta_pending; 133 + /* pending priority requests */ 134 + int prio_pending; 135 135 /* number of requests that are on the dispatch list or inside driver */ 136 136 int dispatched; 137 137 ··· 684 684 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 685 685 return rq_is_sync(rq1) ? rq1 : rq2; 686 686 687 - if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) 688 - return rq1->cmd_flags & REQ_META ? rq1 : rq2; 687 + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) 688 + return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; 689 689 690 690 s1 = blk_rq_pos(rq1); 691 691 s2 = blk_rq_pos(rq2); ··· 1612 1612 cfqq->cfqd->rq_queued--; 1613 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1614 1614 rq_data_dir(rq), rq_is_sync(rq)); 1615 - if (rq->cmd_flags & REQ_META) { 1616 - WARN_ON(!cfqq->meta_pending); 1617 - cfqq->meta_pending--; 1615 + if (rq->cmd_flags & REQ_PRIO) { 1616 + WARN_ON(!cfqq->prio_pending); 1617 + cfqq->prio_pending--; 1618 1618 } 1619 1619 } 1620 1620 ··· 3372 3372 * So both queues are sync. Let the new request get disk time if 3373 3373 * it's a metadata request and the current queue is doing regular IO. 3374 3374 */ 3375 - if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) 3375 + if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) 3376 3376 return true; 3377 3377 3378 3378 /* ··· 3439 3439 struct cfq_io_context *cic = RQ_CIC(rq); 3440 3440 3441 3441 cfqd->rq_queued++; 3442 - if (rq->cmd_flags & REQ_META) 3443 - cfqq->meta_pending++; 3442 + if (rq->cmd_flags & REQ_PRIO) 3443 + cfqq->prio_pending++; 3444 3444 3445 3445 cfq_update_io_thinktime(cfqd, cfqq, cic); 3446 3446 cfq_update_io_seektime(cfqd, cfqq, rq);
+4 -4
drivers/block/floppy.c
··· 4250 4250 use_virtual_dma = can_use_virtual_dma & 1; 4251 4251 fdc_state[0].address = FDC1; 4252 4252 if (fdc_state[0].address == -1) { 4253 - del_timer(&fd_timeout); 4253 + del_timer_sync(&fd_timeout); 4254 4254 err = -ENODEV; 4255 4255 goto out_unreg_region; 4256 4256 } ··· 4261 4261 fdc = 0; /* reset fdc in case of unexpected interrupt */ 4262 4262 err = floppy_grab_irq_and_dma(); 4263 4263 if (err) { 4264 - del_timer(&fd_timeout); 4264 + del_timer_sync(&fd_timeout); 4265 4265 err = -EBUSY; 4266 4266 goto out_unreg_region; 4267 4267 } ··· 4318 4318 user_reset_fdc(-1, FD_RESET_ALWAYS, false); 4319 4319 } 4320 4320 fdc = 0; 4321 - del_timer(&fd_timeout); 4321 + del_timer_sync(&fd_timeout); 4322 4322 current_drive = 0; 4323 4323 initialized = true; 4324 4324 if (have_no_fdc) { ··· 4368 4368 unregister_blkdev(FLOPPY_MAJOR, "fd"); 4369 4369 out_put_disk: 4370 4370 while (dr--) { 4371 - del_timer(&motor_off_timer[dr]); 4371 + del_timer_sync(&motor_off_timer[dr]); 4372 4372 if (disks[dr]->queue) 4373 4373 blk_cleanup_queue(disks[dr]->queue); 4374 4374 put_disk(disks[dr]);
+1 -1
drivers/block/xen-blkback/common.h
··· 46 46 47 47 #define DRV_PFX "xen-blkback:" 48 48 #define DPRINTK(fmt, args...) \ 49 - pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ 49 + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ 50 50 __func__, __LINE__, ##args) 51 51 52 52
+3 -3
drivers/block/xen-blkback/xenbus.c
··· 590 590 591 591 /* 592 592 * Enforce precondition before potential leak point. 593 - * blkif_disconnect() is idempotent. 593 + * xen_blkif_disconnect() is idempotent. 594 594 */ 595 595 xen_blkif_disconnect(be->blkif); 596 596 ··· 601 601 break; 602 602 603 603 case XenbusStateClosing: 604 - xen_blkif_disconnect(be->blkif); 605 604 xenbus_switch_state(dev, XenbusStateClosing); 606 605 break; 607 606 608 607 case XenbusStateClosed: 608 + xen_blkif_disconnect(be->blkif); 609 609 xenbus_switch_state(dev, XenbusStateClosed); 610 610 if (xenbus_dev_is_online(dev)) 611 611 break; 612 612 /* fall through if not online */ 613 613 case XenbusStateUnknown: 614 - /* implies blkif_disconnect() via blkback_remove() */ 614 + /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ 615 615 device_unregister(&dev->dev); 616 616 break; 617 617
+3
drivers/mmc/card/block.c
··· 926 926 /* 927 927 * Reliable writes are used to implement Forced Unit Access and 928 928 * REQ_META accesses, and are supported only on MMCs. 929 + * 930 + * XXX: this really needs a good explanation of why REQ_META 931 + * is treated special. 929 932 */ 930 933 bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || 931 934 (req->cmd_flags & REQ_META)) &&
+2 -2
fs/ext3/inode.c
··· 1134 1134 return bh; 1135 1135 if (buffer_uptodate(bh)) 1136 1136 return bh; 1137 - ll_rw_block(READ_META, 1, &bh); 1137 + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 1138 1138 wait_on_buffer(bh); 1139 1139 if (buffer_uptodate(bh)) 1140 1140 return bh; ··· 2807 2807 trace_ext3_load_inode(inode); 2808 2808 get_bh(bh); 2809 2809 bh->b_end_io = end_buffer_read_sync; 2810 - submit_bh(READ_META, bh); 2810 + submit_bh(READ | REQ_META | REQ_PRIO, bh); 2811 2811 wait_on_buffer(bh); 2812 2812 if (!buffer_uptodate(bh)) { 2813 2813 ext3_error(inode->i_sb, "ext3_get_inode_loc",
+2 -1
fs/ext3/namei.c
··· 922 922 bh = ext3_getblk(NULL, dir, b++, 0, &err); 923 923 bh_use[ra_max] = bh; 924 924 if (bh) 925 - ll_rw_block(READ_META, 1, &bh); 925 + ll_rw_block(READ | REQ_META | REQ_PRIO, 926 + 1, &bh); 926 927 } 927 928 } 928 929 if ((bh = bh_use[ra_ptr++]) == NULL)
+2 -2
fs/ext4/inode.c
··· 647 647 return bh; 648 648 if (buffer_uptodate(bh)) 649 649 return bh; 650 - ll_rw_block(READ_META, 1, &bh); 650 + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 651 651 wait_on_buffer(bh); 652 652 if (buffer_uptodate(bh)) 653 653 return bh; ··· 3298 3298 trace_ext4_load_inode(inode); 3299 3299 get_bh(bh); 3300 3300 bh->b_end_io = end_buffer_read_sync; 3301 - submit_bh(READ_META, bh); 3301 + submit_bh(READ | REQ_META | REQ_PRIO, bh); 3302 3302 wait_on_buffer(bh); 3303 3303 if (!buffer_uptodate(bh)) { 3304 3304 EXT4_ERROR_INODE_BLOCK(inode, block,
+2 -1
fs/ext4/namei.c
··· 922 922 bh = ext4_getblk(NULL, dir, b++, 0, &err); 923 923 bh_use[ra_max] = bh; 924 924 if (bh) 925 - ll_rw_block(READ_META, 1, &bh); 925 + ll_rw_block(READ | REQ_META | REQ_PRIO, 926 + 1, &bh); 926 927 } 927 928 } 928 929 if ((bh = bh_use[ra_ptr++]) == NULL)
+2 -2
fs/gfs2/log.c
··· 624 624 bh->b_end_io = end_buffer_write_sync; 625 625 get_bh(bh); 626 626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 627 - submit_bh(WRITE_SYNC | REQ_META, bh); 627 + submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 628 628 else 629 - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); 629 + submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); 630 630 wait_on_buffer(bh); 631 631 632 632 if (!buffer_uptodate(bh))
+3 -3
fs/gfs2/meta_io.c
··· 37 37 { 38 38 struct buffer_head *bh, *head; 39 39 int nr_underway = 0; 40 - int write_op = REQ_META | 40 + int write_op = REQ_META | REQ_PRIO | 41 41 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); 42 42 43 43 BUG_ON(!PageLocked(page)); ··· 225 225 } 226 226 bh->b_end_io = end_buffer_read_sync; 227 227 get_bh(bh); 228 - submit_bh(READ_SYNC | REQ_META, bh); 228 + submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); 229 229 if (!(flags & DIO_WAIT)) 230 230 return 0; 231 231 ··· 435 435 if (buffer_uptodate(first_bh)) 436 436 goto out; 437 437 if (!buffer_locked(first_bh)) 438 - ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); 438 + ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); 439 439 440 440 dblock++; 441 441 extlen--;
+1 -1
fs/gfs2/ops_fstype.c
··· 224 224 225 225 bio->bi_end_io = end_bio_io_page; 226 226 bio->bi_private = page; 227 - submit_bio(READ_SYNC | REQ_META, bio); 227 + submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); 228 228 wait_on_page_locked(page); 229 229 bio_put(bio); 230 230 if (!PageUptodate(page)) {
+1 -1
fs/gfs2/quota.c
··· 709 709 set_buffer_uptodate(bh); 710 710 711 711 if (!buffer_uptodate(bh)) { 712 - ll_rw_block(READ_META, 1, &bh); 712 + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 713 713 wait_on_buffer(bh); 714 714 if (!buffer_uptodate(bh)) 715 715 goto unlock_out;
+4 -2
include/linux/blk_types.h
··· 124 124 125 125 __REQ_SYNC, /* request is sync (sync write or read) */ 126 126 __REQ_META, /* metadata io request */ 127 + __REQ_PRIO, /* boost priority in cfq */ 127 128 __REQ_DISCARD, /* request to discard sectors */ 128 129 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ 129 130 ··· 162 161 #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) 163 162 #define REQ_SYNC (1 << __REQ_SYNC) 164 163 #define REQ_META (1 << __REQ_META) 164 + #define REQ_PRIO (1 << __REQ_PRIO) 165 165 #define REQ_DISCARD (1 << __REQ_DISCARD) 166 166 #define REQ_NOIDLE (1 << __REQ_NOIDLE) 167 167 168 168 #define REQ_FAILFAST_MASK \ 169 169 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 170 170 #define REQ_COMMON_MASK \ 171 - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ 172 - REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) 171 + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ 172 + REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) 173 173 #define REQ_CLONE_MASK REQ_COMMON_MASK 174 174 175 175 #define REQ_RAHEAD (1 << __REQ_RAHEAD)
-1
include/linux/blkdev.h
··· 873 873 struct list_head list; 874 874 struct list_head cb_list; 875 875 unsigned int should_sort; 876 - unsigned int count; 877 876 }; 878 877 #define BLK_MAX_REQUEST_COUNT 16 879 878
-2
include/linux/fs.h
··· 162 162 #define READA RWA_MASK 163 163 164 164 #define READ_SYNC (READ | REQ_SYNC) 165 - #define READ_META (READ | REQ_META) 166 165 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) 167 166 #define WRITE_ODIRECT (WRITE | REQ_SYNC) 168 - #define WRITE_META (WRITE | REQ_META) 169 167 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) 170 168 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) 171 169 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+21 -9
mm/backing-dev.c
··· 359 359 return max(5UL * 60 * HZ, interval); 360 360 } 361 361 362 + /* 363 + * Clear pending bit and wakeup anybody waiting for flusher thread creation or 364 + * shutdown 365 + */ 366 + static void bdi_clear_pending(struct backing_dev_info *bdi) 367 + { 368 + clear_bit(BDI_pending, &bdi->state); 369 + smp_mb__after_clear_bit(); 370 + wake_up_bit(&bdi->state, BDI_pending); 371 + } 372 + 362 373 static int bdi_forker_thread(void *ptr) 363 374 { 364 375 struct bdi_writeback *me = ptr; ··· 401 390 } 402 391 403 392 spin_lock_bh(&bdi_lock); 393 + /* 394 + * In the following loop we are going to check whether we have 395 + * some work to do without any synchronization with tasks 396 + * waking us up to do work for them. So we have to set task 397 + * state already here so that we don't miss wakeups coming 398 + * after we verify some condition. 399 + */ 404 400 set_current_state(TASK_INTERRUPTIBLE); 405 401 406 402 list_for_each_entry(bdi, &bdi_list, bdi_list) { ··· 487 469 spin_unlock_bh(&bdi->wb_lock); 488 470 wake_up_process(task); 489 471 } 472 + bdi_clear_pending(bdi); 490 473 break; 491 474 492 475 case KILL_THREAD: 493 476 __set_current_state(TASK_RUNNING); 494 477 kthread_stop(task); 478 + bdi_clear_pending(bdi); 495 479 break; 496 480 497 481 case NO_ACTION: ··· 509 489 else 510 490 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); 511 491 try_to_freeze(); 512 - /* Back to the main loop */ 513 - continue; 492 + break; 514 493 } 515 - 516 - /* 517 - * Clear pending bit and wakeup anybody waiting to tear us down. 518 - */ 519 - clear_bit(BDI_pending, &bdi->state); 520 - smp_mb__after_clear_bit(); 521 - wake_up_bit(&bdi->state, BDI_pending); 522 494 } 523 495 524 496 return 0;