Merge branch 'for-3.6/drivers' of git://git.kernel.dk/linux-block

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'for-3.6/drivers' of git://git.kernel.dk/linux-block

Pull block driver changes from Jens Axboe:

- Making the plugging support for drivers a bit more sane from Neil.
This supersedes the plugging change from Shaohua as well.

- The usual round of drbd updates.

- Using a tail add instead of a head add in the request completion for
ndb, making us find the most completed request more quickly.

- A few floppy changes, getting rid of a duplicated flag and also
running the floppy init async (since it takes forever in boot terms)
from Andi.

* 'for-3.6/drivers' of git://git.kernel.dk/linux-block:
floppy: remove duplicated flag FD_RAW_NEED_DISK
blk: pass from_schedule to non-request unplug functions.
block: stack unplug
blk: centralize non-request unplug handling.
md: remove plug_cnt feature of plugging.
block/nbd: micro-optimization in nbd request completion
drbd: announce FLUSH/FUA capability to upper layers
drbd: fix max_bio_size to be unsigned
drbd: flush drbd work queue before invalidate/invalidate remote
drbd: fix potential access after free
drbd: call local-io-error handler early
drbd: do not reset rs_pending_cnt too early
drbd: reset congestion information before reporting it in /proc/drbd
drbd: report congestion if we are waiting for some userland callback
drbd: differentiate between normal and forced detach
drbd: cleanup, remove two unused global flags
floppy: Run floppy initialization asynchronous

Linus Torvalds 13 years ago eff0d13f 8cf1a3fc

+238 -177

19 changed files

expand all collapse all

block

blk-core.c

drivers

block

drbd

drbd_actlog.c

drbd_bitmap.c

drbd_int.h

drbd_main.c

drbd_nl.c

drbd_proc.c

drbd_receiver.c

drbd_req.c

drbd_worker.c

floppy.c

nbd.c

umem.c

md.c

md.h

raid1.c

raid10.c

raid5.c

include

linux

blkdev.h

+34 -10

block/blk-core.c

reviewed

··· 2909 2909 2910 2910 } 2911 2911 2912 2912 - static void flush_plug_callbacks(struct blk_plug *plug) 2912 2912 + static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) 2913 2913 { 2914 2914 LIST_HEAD(callbacks); 2915 2915 2916 2916 - if (list_empty(&plug->cb_list)) 2917 2917 - return; 2916 2916 + while (!list_empty(&plug->cb_list)) { 2917 2917 + list_splice_init(&plug->cb_list, &callbacks); 2918 2918 2919 2919 - list_splice_init(&plug->cb_list, &callbacks); 2920 2920 - 2921 2921 - while (!list_empty(&callbacks)) { 2922 2922 - struct blk_plug_cb *cb = list_first_entry(&callbacks, 2919 2919 + while (!list_empty(&callbacks)) { 2920 2920 + struct blk_plug_cb *cb = list_first_entry(&callbacks, 2923 2921 struct blk_plug_cb, 2924 2922 list); 2925 2925 - list_del(&cb->list); 2926 2926 - cb->callback(cb); 2923 2923 + list_del(&cb->list); 2924 2924 + cb->callback(cb, from_schedule); 2925 2925 + } 2927 2926 } 2928 2927 } 2928 2928 + 2929 2929 + struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, 2930 2930 + int size) 2931 2931 + { 2932 2932 + struct blk_plug *plug = current->plug; 2933 2933 + struct blk_plug_cb *cb; 2934 2934 + 2935 2935 + if (!plug) 2936 2936 + return NULL; 2937 2937 + 2938 2938 + list_for_each_entry(cb, &plug->cb_list, list) 2939 2939 + if (cb->callback == unplug && cb->data == data) 2940 2940 + return cb; 2941 2941 + 2942 2942 + /* Not currently on the callback list */ 2943 2943 + BUG_ON(size < sizeof(*cb)); 2944 2944 + cb = kzalloc(size, GFP_ATOMIC); 2945 2945 + if (cb) { 2946 2946 + cb->data = data; 2947 2947 + cb->callback = unplug; 2948 2948 + list_add(&cb->list, &plug->cb_list); 2949 2949 + } 2950 2950 + return cb; 2951 2951 + } 2952 2952 + EXPORT_SYMBOL(blk_check_plugged); 2929 2953 2930 2954 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) 2931 2955 { ··· 2961 2937 2962 2938 BUG_ON(plug->magic != PLUG_MAGIC); 2963 2939 2964 2964 - flush_plug_callbacks(plug); 2940 2940 + flush_plug_callbacks(plug, from_schedule); 2965 2941 if (list_empty(&plug->list)) 2966 2942 return; 2967 2943

+6 -2

drivers/block/drbd/drbd_actlog.c

reviewed

··· 411 411 + mdev->ldev->md.al_offset + mdev->al_tr_pos; 412 412 413 413 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) 414 414 - drbd_chk_io_error(mdev, 1, true); 414 414 + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); 415 415 416 416 if (++mdev->al_tr_pos > 417 417 div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) ··· 876 876 unsigned int enr, count = 0; 877 877 struct lc_element *e; 878 878 879 879 - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 879 879 + /* this should be an empty REQ_FLUSH */ 880 880 + if (size == 0) 881 881 + return 0; 882 882 + 883 883 + if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 880 884 dev_err(DEV, "sector: %llus, size: %d\n", 881 885 (unsigned long long)sector, size); 882 886 return 0;

+2 -2

drivers/block/drbd/drbd_bitmap.c

reviewed

··· 1096 1096 1097 1097 if (ctx->error) { 1098 1098 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); 1099 1099 - drbd_chk_io_error(mdev, 1, true); 1099 1099 + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); 1100 1100 err = -EIO; /* ctx->error ? */ 1101 1101 } 1102 1102 ··· 1212 1212 wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); 1213 1213 1214 1214 if (ctx->error) 1215 1215 - drbd_chk_io_error(mdev, 1, true); 1215 1215 + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); 1216 1216 /* that should force detach, so the in memory bitmap will be 1217 1217 * gone in a moment as well. */ 1218 1218

+31 -13

drivers/block/drbd/drbd_int.h

reviewed

··· 813 813 SIGNAL_ASENDER, /* whether asender wants to be interrupted */ 814 814 SEND_PING, /* whether asender should send a ping asap */ 815 815 816 816 - UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ 817 816 UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ 818 817 MD_DIRTY, /* current uuids and flags not yet on disk */ 819 818 DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ ··· 823 824 CRASHED_PRIMARY, /* This node was a crashed primary. 824 825 * Gets cleared when the state.conn 825 826 * goes into C_CONNECTED state. */ 826 826 - NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ 827 827 CONSIDER_RESYNC, 828 828 829 829 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ ··· 832 834 BITMAP_IO_QUEUED, /* Started bitmap IO */ 833 835 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ 834 836 WAS_IO_ERROR, /* Local disk failed returned IO error */ 837 837 + FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ 835 838 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 836 839 NET_CONGESTED, /* The data socket is congested */ 837 840 ··· 850 851 AL_SUSPENDED, /* Activity logging is currently suspended. */ 851 852 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ 852 853 STATE_SENT, /* Do not change state/UUIDs while this is set */ 854 854 + 855 855 + CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC) 856 856 + * pending, from drbd worker context. 857 857 + * If set, bdi_write_congested() returns true, 858 858 + * so shrink_page_list() would not recurse into, 859 859 + * and potentially deadlock on, this drbd worker. 860 860 + */ 853 861 }; 854 862 855 863 struct drbd_bitmap; /* opaque for drbd_conf */ ··· 1136 1130 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ 1137 1131 int rs_planed; /* resync sectors already planned */ 1138 1132 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ 1139 1139 - int peer_max_bio_size; 1140 1140 - int local_max_bio_size; 1133 1133 + unsigned int peer_max_bio_size; 1134 1134 + unsigned int local_max_bio_size; 1141 1135 }; 1142 1136 1143 1137 static inline struct drbd_conf *minor_to_mdev(unsigned int minor) ··· 1441 1435 * hash table. */ 1442 1436 #define HT_SHIFT 8 1443 1437 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) 1444 1444 - #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ 1438 1438 + #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */ 1445 1439 1446 1446 - #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ 1440 1440 + #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* The old header only allows packets up to 32Kib data */ 1447 1441 1448 1442 /* Number of elements in the app_reads_hash */ 1449 1443 #define APP_R_HSIZE 15 ··· 1846 1840 return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); 1847 1841 } 1848 1842 1843 1843 + enum drbd_force_detach_flags { 1844 1844 + DRBD_IO_ERROR, 1845 1845 + DRBD_META_IO_ERROR, 1846 1846 + DRBD_FORCE_DETACH, 1847 1847 + }; 1848 1848 + 1849 1849 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) 1850 1850 - static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) 1850 1850 + static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, 1851 1851 + enum drbd_force_detach_flags forcedetach, 1852 1852 + const char *where) 1851 1853 { 1852 1854 switch (mdev->ldev->dc.on_io_error) { 1853 1855 case EP_PASS_ON: 1854 1854 - if (!forcedetach) { 1856 1856 + if (forcedetach == DRBD_IO_ERROR) { 1855 1857 if (__ratelimit(&drbd_ratelimit_state)) 1856 1858 dev_err(DEV, "Local IO failed in %s.\n", where); 1857 1859 if (mdev->state.disk > D_INCONSISTENT) ··· 1870 1856 case EP_DETACH: 1871 1857 case EP_CALL_HELPER: 1872 1858 set_bit(WAS_IO_ERROR, &mdev->flags); 1859 1859 + if (forcedetach == DRBD_FORCE_DETACH) 1860 1860 + set_bit(FORCE_DETACH, &mdev->flags); 1873 1861 if (mdev->state.disk > D_FAILED) { 1874 1862 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); 1875 1863 dev_err(DEV, ··· 1891 1875 */ 1892 1876 #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) 1893 1877 static inline void drbd_chk_io_error_(struct drbd_conf *mdev, 1894 1894 - int error, int forcedetach, const char *where) 1878 1878 + int error, enum drbd_force_detach_flags forcedetach, const char *where) 1895 1879 { 1896 1880 if (error) { 1897 1881 unsigned long flags; ··· 2421 2405 int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); 2422 2406 2423 2407 D_ASSERT(ap_bio >= 0); 2408 2408 + 2409 2409 + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { 2410 2410 + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) 2411 2411 + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); 2412 2412 + } 2413 2413 + 2424 2414 /* this currently does wake_up for every dec_ap_bio! 2425 2415 * maybe rather introduce some type of hysteresis? 2426 2416 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ 2427 2417 if (ap_bio < mxb) 2428 2418 wake_up(&mdev->misc_wait); 2429 2429 - if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { 2430 2430 - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) 2431 2431 - drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); 2432 2432 - } 2433 2419 } 2434 2420 2435 2421 static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)

+49 -16

drivers/block/drbd/drbd_main.c

reviewed

··· 1514 1514 1515 1515 /* Do not change the order of the if above and the two below... */ 1516 1516 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ 1517 1517 + /* we probably will start a resync soon. 1518 1518 + * make sure those things are properly reset. */ 1519 1519 + mdev->rs_total = 0; 1520 1520 + mdev->rs_failed = 0; 1521 1521 + atomic_set(&mdev->rs_pending_cnt, 0); 1522 1522 + drbd_rs_cancel_all(mdev); 1523 1523 + 1517 1524 drbd_send_uuids(mdev); 1518 1525 drbd_send_state(mdev, ns); 1519 1526 } ··· 1637 1630 eh = mdev->ldev->dc.on_io_error; 1638 1631 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); 1639 1632 1640 1640 - /* Immediately allow completion of all application IO, that waits 1641 1641 - for completion from the local disk. */ 1642 1642 - tl_abort_disk_io(mdev); 1633 1633 + if (was_io_error && eh == EP_CALL_HELPER) 1634 1634 + drbd_khelper(mdev, "local-io-error"); 1635 1635 + 1636 1636 + /* Immediately allow completion of all application IO, 1637 1637 + * that waits for completion from the local disk, 1638 1638 + * if this was a force-detach due to disk_timeout 1639 1639 + * or administrator request (drbdsetup detach --force). 1640 1640 + * Do NOT abort otherwise. 1641 1641 + * Aborting local requests may cause serious problems, 1642 1642 + * if requests are completed to upper layers already, 1643 1643 + * and then later the already submitted local bio completes. 1644 1644 + * This can cause DMA into former bio pages that meanwhile 1645 1645 + * have been re-used for other things. 1646 1646 + * So aborting local requests may cause crashes, 1647 1647 + * or even worse, silent data corruption. 1648 1648 + */ 1649 1649 + if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) 1650 1650 + tl_abort_disk_io(mdev); 1643 1651 1644 1652 /* current state still has to be D_FAILED, 1645 1653 * there is only one way out: to D_DISKLESS, ··· 1675 1653 drbd_md_sync(mdev); 1676 1654 } 1677 1655 put_ldev(mdev); 1678 1678 - 1679 1679 - if (was_io_error && eh == EP_CALL_HELPER) 1680 1680 - drbd_khelper(mdev, "local-io-error"); 1681 1656 } 1682 1657 1683 1658 /* second half of local IO error, failure to attach, ··· 1687 1668 dev_err(DEV, 1688 1669 "ASSERT FAILED: disk is %s while going diskless\n", 1689 1670 drbd_disk_str(mdev->state.disk)); 1690 1690 - 1691 1691 - mdev->rs_total = 0; 1692 1692 - mdev->rs_failed = 0; 1693 1693 - atomic_set(&mdev->rs_pending_cnt, 0); 1694 1671 1695 1672 if (ns.conn >= C_CONNECTED) 1696 1673 drbd_send_state(mdev, ns); ··· 2209 2194 { 2210 2195 struct p_sizes p; 2211 2196 sector_t d_size, u_size; 2212 2212 - int q_order_type, max_bio_size; 2197 2197 + int q_order_type; 2198 2198 + unsigned int max_bio_size; 2213 2199 int ok; 2214 2200 2215 2201 if (get_ldev_if_state(mdev, D_NEGOTIATING)) { ··· 2219 2203 u_size = mdev->ldev->dc.disk_size; 2220 2204 q_order_type = drbd_queue_order_type(mdev); 2221 2205 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; 2222 2222 - max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); 2206 2206 + max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); 2223 2207 put_ldev(mdev); 2224 2208 } else { 2225 2209 d_size = 0; ··· 2230 2214 2231 2215 /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */ 2232 2216 if (mdev->agreed_pro_version <= 94) 2233 2233 - max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 2217 2217 + max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 2234 2218 2235 2219 p.d_size = cpu_to_be64(d_size); 2236 2220 p.u_size = cpu_to_be64(u_size); ··· 3557 3541 goto out; 3558 3542 } 3559 3543 3544 3544 + if (test_bit(CALLBACK_PENDING, &mdev->flags)) { 3545 3545 + r |= (1 << BDI_async_congested); 3546 3546 + /* Without good local data, we would need to read from remote, 3547 3547 + * and that would need the worker thread as well, which is 3548 3548 + * currently blocked waiting for that usermode helper to 3549 3549 + * finish. 3550 3550 + */ 3551 3551 + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) 3552 3552 + r |= (1 << BDI_sync_congested); 3553 3553 + else 3554 3554 + put_ldev(mdev); 3555 3555 + r &= bdi_bits; 3556 3556 + reason = 'c'; 3557 3557 + goto out; 3558 3558 + } 3559 3559 + 3560 3560 if (get_ldev(mdev)) { 3561 3561 q = bdev_get_queue(mdev->ldev->backing_bdev); 3562 3562 r = bdi_congested(&q->backing_dev_info, bdi_bits); ··· 3636 3604 q->backing_dev_info.congested_data = mdev; 3637 3605 3638 3606 blk_queue_make_request(q, drbd_make_request); 3607 3607 + blk_queue_flush(q, REQ_FLUSH | REQ_FUA); 3639 3608 /* Setting the max_hw_sectors to an odd value of 8kibyte here 3640 3609 This triggers a max_bio_size message upon first attach or connect */ 3641 3610 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); ··· 3903 3870 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { 3904 3871 /* this was a try anyways ... */ 3905 3872 dev_err(DEV, "meta data update failed!\n"); 3906 3906 - drbd_chk_io_error(mdev, 1, true); 3873 3873 + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); 3907 3874 } 3908 3875 3909 3876 /* Update mdev->ldev->md.la_size_sect, ··· 3983 3950 3984 3951 spin_lock_irq(&mdev->req_lock); 3985 3952 if (mdev->state.conn < C_CONNECTED) { 3986 3986 - int peer; 3953 3953 + unsigned int peer; 3987 3954 peer = be32_to_cpu(buffer->la_peer_max_bio_size); 3988 3988 - peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); 3955 3955 + peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE); 3989 3956 mdev->peer_max_bio_size = peer; 3990 3957 } 3991 3958 spin_unlock_irq(&mdev->req_lock);

+28 -8

drivers/block/drbd/drbd_nl.c

reviewed

··· 147 147 char *argv[] = {usermode_helper, cmd, mb, NULL }; 148 148 int ret; 149 149 150 150 + if (current == mdev->worker.task) 151 151 + set_bit(CALLBACK_PENDING, &mdev->flags); 152 152 + 150 153 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); 151 154 152 155 if (get_net_conf(mdev)) { ··· 191 188 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 192 189 usermode_helper, cmd, mb, 193 190 (ret >> 8) & 0xff, ret); 191 191 + 192 192 + if (current == mdev->worker.task) 193 193 + clear_bit(CALLBACK_PENDING, &mdev->flags); 194 194 195 195 if (ret < 0) /* Ignore any ERRNOs we got. */ 196 196 ret = 0; ··· 801 795 static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) 802 796 { 803 797 struct request_queue * const q = mdev->rq_queue; 804 804 - int max_hw_sectors = max_bio_size >> 9; 805 805 - int max_segments = 0; 798 798 + unsigned int max_hw_sectors = max_bio_size >> 9; 799 799 + unsigned int max_segments = 0; 806 800 807 801 if (get_ldev_if_state(mdev, D_ATTACHING)) { 808 802 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; ··· 835 829 836 830 void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) 837 831 { 838 838 - int now, new, local, peer; 832 832 + unsigned int now, new, local, peer; 839 833 840 834 now = queue_max_hw_sectors(mdev->rq_queue) << 9; 841 835 local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ ··· 846 840 mdev->local_max_bio_size = local; 847 841 put_ldev(mdev); 848 842 } 843 843 + local = min(local, DRBD_MAX_BIO_SIZE); 849 844 850 845 /* We may ignore peer limits if the peer is modern enough. 851 846 Because new from 8.3.8 onwards the peer can use multiple 852 847 BIOs for a single peer_request */ 853 848 if (mdev->state.conn >= C_CONNECTED) { 854 849 if (mdev->agreed_pro_version < 94) { 855 855 - peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 850 850 + peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 856 851 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ 857 852 } else if (mdev->agreed_pro_version == 94) 858 853 peer = DRBD_MAX_SIZE_H80_PACKET; ··· 861 854 peer = DRBD_MAX_BIO_SIZE; 862 855 } 863 856 864 864 - new = min_t(int, local, peer); 857 857 + new = min(local, peer); 865 858 866 859 if (mdev->state.role == R_PRIMARY && new < now) 867 867 - dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); 860 860 + dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now); 868 861 869 862 if (new != now) 870 863 dev_info(DEV, "max BIO size = %u\n", new); ··· 956 949 * e.g. if someone calls attach from the on-io-error handler, 957 950 * to realize a "hot spare" feature (not that I'd recommend that) */ 958 951 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 952 952 + 953 953 + /* make sure there is no leftover from previous force-detach attempts */ 954 954 + clear_bit(FORCE_DETACH, &mdev->flags); 955 955 + 956 956 + /* and no leftover from previously aborted resync or verify, either */ 957 957 + mdev->rs_total = 0; 958 958 + mdev->rs_failed = 0; 959 959 + atomic_set(&mdev->rs_pending_cnt, 0); 959 960 960 961 /* allocation not in the IO path, cqueue thread context */ 961 962 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); ··· 1360 1345 } 1361 1346 1362 1347 if (dt.detach_force) { 1348 1348 + set_bit(FORCE_DETACH, &mdev->flags); 1363 1349 drbd_force_state(mdev, NS(disk, D_FAILED)); 1364 1350 reply->ret_code = SS_SUCCESS; 1365 1351 goto out; ··· 1978 1962 int retcode; 1979 1963 1980 1964 /* If there is still bitmap IO pending, probably because of a previous 1981 1981 - * resync just being finished, wait for it before requesting a new resync. */ 1965 1965 + * resync just being finished, wait for it before requesting a new resync. 1966 1966 + * Also wait for it's after_state_ch(). */ 1982 1967 drbd_suspend_io(mdev); 1983 1968 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 1969 1969 + drbd_flush_workqueue(mdev); 1984 1970 1985 1971 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); 1986 1972 ··· 2021 2003 int retcode; 2022 2004 2023 2005 /* If there is still bitmap IO pending, probably because of a previous 2024 2024 - * resync just being finished, wait for it before requesting a new resync. */ 2006 2006 + * resync just being finished, wait for it before requesting a new resync. 2007 2007 + * Also wait for it's after_state_ch(). */ 2025 2008 drbd_suspend_io(mdev); 2026 2009 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 2010 2010 + drbd_flush_workqueue(mdev); 2027 2011 2028 2012 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); 2029 2013

drivers/block/drbd/drbd_proc.c

reviewed

··· 245 245 mdev->state.role == R_SECONDARY) { 246 246 seq_printf(seq, "%2d: cs:Unconfigured\n", i); 247 247 } else { 248 248 + /* reset mdev->congestion_reason */ 249 249 + bdi_rw_congested(&mdev->rq_queue->backing_dev_info); 250 250 + 248 251 seq_printf(seq, 249 252 "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n" 250 253 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "

+28 -10

drivers/block/drbd/drbd_receiver.c

reviewed

··· 277 277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; 278 278 int i; 279 279 280 280 + if (page == NULL) 281 281 + return; 282 282 + 280 283 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) 281 284 i = page_chain_free(page); 282 285 else { ··· 319 316 gfp_t gfp_mask) __must_hold(local) 320 317 { 321 318 struct drbd_epoch_entry *e; 322 322 - struct page *page; 319 319 + struct page *page = NULL; 323 320 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 324 321 325 322 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) ··· 332 329 return NULL; 333 330 } 334 331 335 335 - page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); 336 336 - if (!page) 337 337 - goto fail; 332 332 + if (data_size) { 333 333 + page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); 334 334 + if (!page) 335 335 + goto fail; 336 336 + } 338 337 339 338 INIT_HLIST_NODE(&e->collision); 340 339 e->epoch = NULL; ··· 1275 1270 1276 1271 data_size -= dgs; 1277 1272 1278 1278 - ERR_IF(data_size == 0) return NULL; 1279 1273 ERR_IF(data_size & 0x1ff) return NULL; 1280 1274 ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; 1281 1275 ··· 1294 1290 e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); 1295 1291 if (!e) 1296 1292 return NULL; 1293 1293 + 1294 1294 + if (!data_size) 1295 1295 + return e; 1297 1296 1298 1297 ds = data_size; 1299 1298 page = e->pages; ··· 1722 1715 1723 1716 dp_flags = be32_to_cpu(p->dp_flags); 1724 1717 rw |= wire_flags_to_bio(mdev, dp_flags); 1718 1718 + if (e->pages == NULL) { 1719 1719 + D_ASSERT(e->size == 0); 1720 1720 + D_ASSERT(dp_flags & DP_FLUSH); 1721 1721 + } 1725 1722 1726 1723 if (dp_flags & DP_MAY_SET_IN_SYNC) 1727 1724 e->flags |= EE_MAY_SET_IN_SYNC; ··· 3812 3801 mdev->ee_hash = NULL; 3813 3802 mdev->ee_hash_s = 0; 3814 3803 3815 3815 - /* paranoia code */ 3816 3816 - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) 3817 3817 - if (h->first) 3818 3818 - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", 3819 3819 - (int)(h - mdev->tl_hash), h->first); 3804 3804 + /* We may not have had the chance to wait for all locally pending 3805 3805 + * application requests. The hlist_add_fake() prevents access after 3806 3806 + * free on master bio completion. */ 3807 3807 + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) { 3808 3808 + struct drbd_request *req; 3809 3809 + struct hlist_node *pos, *n; 3810 3810 + hlist_for_each_entry_safe(req, pos, n, h, collision) { 3811 3811 + hlist_del_init(&req->collision); 3812 3812 + hlist_add_fake(&req->collision); 3813 3813 + } 3814 3814 + } 3815 3815 + 3820 3816 kfree(mdev->tl_hash); 3821 3817 mdev->tl_hash = NULL; 3822 3818 mdev->tl_hash_s = 0;

+4 -5

drivers/block/drbd/drbd_req.c

reviewed

··· 455 455 req->rq_state |= RQ_LOCAL_COMPLETED; 456 456 req->rq_state &= ~RQ_LOCAL_PENDING; 457 457 458 458 - __drbd_chk_io_error(mdev, false); 458 458 + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 459 459 _req_may_be_done_not_susp(req, m); 460 460 break; 461 461 ··· 477 477 break; 478 478 } 479 479 480 480 - __drbd_chk_io_error(mdev, false); 480 480 + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 481 481 482 482 goto_queue_for_net_read: 483 483 ··· 1111 1111 /* 1112 1112 * what we "blindly" assume: 1113 1113 */ 1114 1114 - D_ASSERT(bio->bi_size > 0); 1115 1114 D_ASSERT((bio->bi_size & 0x1ff) == 0); 1116 1115 1117 1116 /* to make some things easier, force alignment of requests within the 1118 1117 * granularity of our hash tables */ 1119 1118 s_enr = bio->bi_sector >> HT_SHIFT; 1120 1120 - e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; 1119 1119 + e_enr = bio->bi_size ? (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT : s_enr; 1121 1120 1122 1121 if (likely(s_enr == e_enr)) { 1123 1122 do { ··· 1274 1275 time_after(now, req->start_time + dt) && 1275 1276 !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { 1276 1277 dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); 1277 1277 - __drbd_chk_io_error(mdev, 1); 1278 1278 + __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH); 1278 1279 } 1279 1280 nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; 1280 1281 spin_unlock_irq(&mdev->req_lock);

+2 -10

drivers/block/drbd/drbd_worker.c

reviewed

··· 111 111 if (list_empty(&mdev->read_ee)) 112 112 wake_up(&mdev->ee_wait); 113 113 if (test_bit(__EE_WAS_ERROR, &e->flags)) 114 114 - __drbd_chk_io_error(mdev, false); 114 114 + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 115 115 spin_unlock_irqrestore(&mdev->req_lock, flags); 116 116 117 117 drbd_queue_work(&mdev->data.work, &e->w); ··· 154 154 : list_empty(&mdev->active_ee); 155 155 156 156 if (test_bit(__EE_WAS_ERROR, &e->flags)) 157 157 - __drbd_chk_io_error(mdev, false); 157 157 + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 158 158 spin_unlock_irqrestore(&mdev->req_lock, flags); 159 159 160 160 if (is_syncer_req) ··· 1499 1499 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) { 1500 1500 dev_err(DEV, "Resync already running!\n"); 1501 1501 return; 1502 1502 - } 1503 1503 - 1504 1504 - if (mdev->state.conn < C_AHEAD) { 1505 1505 - /* In case a previous resync run was aborted by an IO error/detach on the peer. */ 1506 1506 - drbd_rs_cancel_all(mdev); 1507 1507 - /* This should be done when we abort the resync. We definitely do not 1508 1508 - want to have this for connections going back and forth between 1509 1509 - Ahead/Behind and SyncSource/SyncTarget */ 1510 1502 } 1511 1503 1512 1504 if (side == C_SYNC_TARGET) {

+21 -3

drivers/block/floppy.c

reviewed

··· 191 191 #include <linux/mutex.h> 192 192 #include <linux/io.h> 193 193 #include <linux/uaccess.h> 194 194 + #include <linux/async.h> 194 195 195 196 /* 196 197 * PS/2 floppies have much slower step rates than regular floppies. ··· 2517 2516 set_fdc((long)current_req->rq_disk->private_data); 2518 2517 2519 2518 raw_cmd = &default_raw_cmd; 2520 2520 - raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_DISK | 2521 2521 - FD_RAW_NEED_SEEK; 2519 2519 + raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK; 2522 2520 raw_cmd->cmd_count = NR_RW; 2523 2521 if (rq_data_dir(current_req) == READ) { 2524 2522 raw_cmd->flags |= FD_RAW_READ; ··· 4123 4123 return get_disk(disks[drive]); 4124 4124 } 4125 4125 4126 4126 - static int __init floppy_init(void) 4126 4126 + static int __init do_floppy_init(void) 4127 4127 { 4128 4128 int i, unit, drive; 4129 4129 int err, dr; ··· 4336 4336 put_disk(disks[dr]); 4337 4337 } 4338 4338 return err; 4339 4339 + } 4340 4340 + 4341 4341 + #ifndef MODULE 4342 4342 + static __init void floppy_async_init(void *data, async_cookie_t cookie) 4343 4343 + { 4344 4344 + do_floppy_init(); 4345 4345 + } 4346 4346 + #endif 4347 4347 + 4348 4348 + static int __init floppy_init(void) 4349 4349 + { 4350 4350 + #ifdef MODULE 4351 4351 + return do_floppy_init(); 4352 4352 + #else 4353 4353 + /* Don't hold up the bootup by the floppy initialization */ 4354 4354 + async_schedule(floppy_async_init, NULL); 4355 4355 + return 0; 4356 4356 + #endif 4339 4357 } 4340 4358 4341 4359 static const struct io_region {

+1 -1

drivers/block/nbd.c

reviewed

··· 485 485 nbd_end_request(req); 486 486 } else { 487 487 spin_lock(&nbd->queue_lock); 488 488 - list_add(&req->queuelist, &nbd->queue_head); 488 488 + list_add_tail(&req->queuelist, &nbd->queue_head); 489 489 spin_unlock(&nbd->queue_lock); 490 490 } 491 491

+7 -30

drivers/block/umem.c

reviewed

··· 513 513 } 514 514 } 515 515 516 516 - struct mm_plug_cb { 517 517 - struct blk_plug_cb cb; 518 518 - struct cardinfo *card; 519 519 - }; 520 520 - 521 521 - static void mm_unplug(struct blk_plug_cb *cb) 516 516 + static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule) 522 517 { 523 523 - struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb); 518 518 + struct cardinfo *card = cb->data; 524 519 525 525 - spin_lock_irq(&mmcb->card->lock); 526 526 - activate(mmcb->card); 527 527 - spin_unlock_irq(&mmcb->card->lock); 528 528 - kfree(mmcb); 520 520 + spin_lock_irq(&card->lock); 521 521 + activate(card); 522 522 + spin_unlock_irq(&card->lock); 523 523 + kfree(cb); 529 524 } 530 525 531 526 static int mm_check_plugged(struct cardinfo *card) 532 527 { 533 533 - struct blk_plug *plug = current->plug; 534 534 - struct mm_plug_cb *mmcb; 535 535 - 536 536 - if (!plug) 537 537 - return 0; 538 538 - 539 539 - list_for_each_entry(mmcb, &plug->cb_list, cb.list) { 540 540 - if (mmcb->cb.callback == mm_unplug && mmcb->card == card) 541 541 - return 1; 542 542 - } 543 543 - /* Not currently on the callback list */ 544 544 - mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC); 545 545 - if (!mmcb) 546 546 - return 0; 547 547 - 548 548 - mmcb->card = card; 549 549 - mmcb->cb.callback = mm_unplug; 550 550 - list_add(&mmcb->cb.list, &plug->cb_list); 551 551 - return 1; 528 528 + return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb)); 552 529 } 553 530 554 531 static void mm_make_request(struct request_queue *q, struct bio *bio)

+5 -54

drivers/md/md.c

reviewed

··· 498 498 } 499 499 EXPORT_SYMBOL(md_flush_request); 500 500 501 501 - /* Support for plugging. 502 502 - * This mirrors the plugging support in request_queue, but does not 503 503 - * require having a whole queue or request structures. 504 504 - * We allocate an md_plug_cb for each md device and each thread it gets 505 505 - * plugged on. This links tot the private plug_handle structure in the 506 506 - * personality data where we keep a count of the number of outstanding 507 507 - * plugs so other code can see if a plug is active. 508 508 - */ 509 509 - struct md_plug_cb { 510 510 - struct blk_plug_cb cb; 511 511 - struct mddev *mddev; 512 512 - }; 513 513 - 514 514 - static void plugger_unplug(struct blk_plug_cb *cb) 501 501 + void md_unplug(struct blk_plug_cb *cb, bool from_schedule) 515 502 { 516 516 - struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb); 517 517 - if (atomic_dec_and_test(&mdcb->mddev->plug_cnt)) 518 518 - md_wakeup_thread(mdcb->mddev->thread); 519 519 - kfree(mdcb); 503 503 + struct mddev *mddev = cb->data; 504 504 + md_wakeup_thread(mddev->thread); 505 505 + kfree(cb); 520 506 } 521 521 - 522 522 - /* Check that an unplug wakeup will come shortly. 523 523 - * If not, wakeup the md thread immediately 524 524 - */ 525 525 - int mddev_check_plugged(struct mddev *mddev) 526 526 - { 527 527 - struct blk_plug *plug = current->plug; 528 528 - struct md_plug_cb *mdcb; 529 529 - 530 530 - if (!plug) 531 531 - return 0; 532 532 - 533 533 - list_for_each_entry(mdcb, &plug->cb_list, cb.list) { 534 534 - if (mdcb->cb.callback == plugger_unplug && 535 535 - mdcb->mddev == mddev) { 536 536 - /* Already on the list, move to top */ 537 537 - if (mdcb != list_first_entry(&plug->cb_list, 538 538 - struct md_plug_cb, 539 539 - cb.list)) 540 540 - list_move(&mdcb->cb.list, &plug->cb_list); 541 541 - return 1; 542 542 - } 543 543 - } 544 544 - /* Not currently on the callback list */ 545 545 - mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC); 546 546 - if (!mdcb) 547 547 - return 0; 548 548 - 549 549 - mdcb->mddev = mddev; 550 550 - mdcb->cb.callback = plugger_unplug; 551 551 - atomic_inc(&mddev->plug_cnt); 552 552 - list_add(&mdcb->cb.list, &plug->cb_list); 553 553 - return 1; 554 554 - } 555 555 - EXPORT_SYMBOL_GPL(mddev_check_plugged); 507 507 + EXPORT_SYMBOL(md_unplug); 556 508 557 509 static inline struct mddev *mddev_get(struct mddev *mddev) 558 510 { ··· 554 602 atomic_set(&mddev->active, 1); 555 603 atomic_set(&mddev->openers, 0); 556 604 atomic_set(&mddev->active_io, 0); 557 557 - atomic_set(&mddev->plug_cnt, 0); 558 605 spin_lock_init(&mddev->write_lock); 559 606 atomic_set(&mddev->flush_pending, 0); 560 607 init_waitqueue_head(&mddev->sb_wait);

+7 -4

drivers/md/md.h

reviewed

··· 266 266 int new_chunk_sectors; 267 267 int reshape_backwards; 268 268 269 269 - atomic_t plug_cnt; /* If device is expecting 270 270 - * more bios soon. 271 271 - */ 272 269 struct md_thread *thread; /* management thread */ 273 270 struct md_thread *sync_thread; /* doing resync or reconstruct */ 274 271 sector_t curr_resync; /* last block scheduled */ ··· 627 630 struct mddev *mddev); 628 631 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 629 632 struct mddev *mddev); 630 630 - extern int mddev_check_plugged(struct mddev *mddev); 631 633 extern void md_trim_bio(struct bio *bio, int offset, int size); 634 634 + 635 635 + extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); 636 636 + static inline int mddev_check_plugged(struct mddev *mddev) 637 637 + { 638 638 + return !!blk_check_plugged(md_unplug, mddev, 639 639 + sizeof(struct blk_plug_cb)); 640 640 + } 632 641 #endif /* _MD_MD_H */

+1 -2

drivers/md/raid1.c

reviewed

··· 2247 2247 blk_start_plug(&plug); 2248 2248 for (;;) { 2249 2249 2250 2250 - if (atomic_read(&mddev->plug_cnt) == 0) 2251 2251 - flush_pending_writes(conf); 2250 2250 + flush_pending_writes(conf); 2252 2251 2253 2252 spin_lock_irqsave(&conf->device_lock, flags); 2254 2253 if (list_empty(head)) {

+1 -2

drivers/md/raid10.c

reviewed

··· 2680 2680 blk_start_plug(&plug); 2681 2681 for (;;) { 2682 2682 2683 2683 - if (atomic_read(&mddev->plug_cnt) == 0) 2684 2684 - flush_pending_writes(conf); 2683 2683 + flush_pending_writes(conf); 2685 2684 2686 2685 spin_lock_irqsave(&conf->device_lock, flags); 2687 2686 if (list_empty(head)) {

+2 -3

drivers/md/raid5.c

reviewed

··· 4562 4562 while (1) { 4563 4563 struct bio *bio; 4564 4564 4565 4565 - if (atomic_read(&mddev->plug_cnt) == 0 && 4565 4565 + if ( 4566 4566 !list_empty(&conf->bitmap_list)) { 4567 4567 /* Now is a good time to flush some bitmap updates */ 4568 4568 conf->seq_flush++; ··· 4572 4572 conf->seq_write = conf->seq_flush; 4573 4573 activate_bit_delay(conf); 4574 4574 } 4575 4575 - if (atomic_read(&mddev->plug_cnt) == 0) 4576 4576 - raid5_activate_delayed(conf); 4575 4575 + raid5_activate_delayed(conf); 4577 4576 4578 4577 while ((bio = remove_bio_from_retry(conf))) { 4579 4578 int ok;

+6 -2

include/linux/blkdev.h

reviewed

··· 922 922 }; 923 923 #define BLK_MAX_REQUEST_COUNT 16 924 924 925 925 + struct blk_plug_cb; 926 926 + typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); 925 927 struct blk_plug_cb { 926 928 struct list_head list; 927 927 - void (*callback)(struct blk_plug_cb *); 929 929 + blk_plug_cb_fn callback; 930 930 + void *data; 928 931 }; 929 929 - 932 932 + extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, 933 933 + void *data, int size); 930 934 extern void blk_start_plug(struct blk_plug *); 931 935 extern void blk_finish_plug(struct blk_plug *); 932 936 extern void blk_flush_plug_list(struct blk_plug *, bool);