Merge branch 'for-4.2/core' of git://git.kernel.dk/linux-block

+1

MAINTAINERS

··· 2075 2075 T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 2076 2076 S: Maintained 2077 2077 F: block/ 2078 + F: kernel/trace/blktrace.c 2078 2079 2079 2080 BLOCK2MTD DRIVER 2080 2081 M: Joern Engel <joern@lazybastard.org>

+2 -2

block/bio-integrity.c

··· 361 361 362 362 /* Restore original bio completion handler */ 363 363 bio->bi_end_io = bip->bip_end_io; 364 - bio_endio_nodec(bio, error); 364 + bio_endio(bio, error); 365 365 } 366 366 367 367 /** ··· 388 388 */ 389 389 if (error) { 390 390 bio->bi_end_io = bip->bip_end_io; 391 - bio_endio_nodec(bio, error); 391 + bio_endio(bio, error); 392 392 393 393 return; 394 394 }

+47 -30

block/bio.c

··· 270 270 { 271 271 memset(bio, 0, sizeof(*bio)); 272 272 bio->bi_flags = 1 << BIO_UPTODATE; 273 - atomic_set(&bio->bi_remaining, 1); 274 - atomic_set(&bio->bi_cnt, 1); 273 + atomic_set(&bio->__bi_remaining, 1); 274 + atomic_set(&bio->__bi_cnt, 1); 275 275 } 276 276 EXPORT_SYMBOL(bio_init); 277 277 ··· 292 292 __bio_free(bio); 293 293 294 294 memset(bio, 0, BIO_RESET_BYTES); 295 - bio->bi_flags = flags|(1 << BIO_UPTODATE); 296 - atomic_set(&bio->bi_remaining, 1); 295 + bio->bi_flags = flags | (1 << BIO_UPTODATE); 296 + atomic_set(&bio->__bi_remaining, 1); 297 297 } 298 298 EXPORT_SYMBOL(bio_reset); 299 299 ··· 301 301 { 302 302 bio_endio(bio->bi_private, error); 303 303 bio_put(bio); 304 + } 305 + 306 + /* 307 + * Increment chain count for the bio. Make sure the CHAIN flag update 308 + * is visible before the raised count. 309 + */ 310 + static inline void bio_inc_remaining(struct bio *bio) 311 + { 312 + bio->bi_flags |= (1 << BIO_CHAIN); 313 + smp_mb__before_atomic(); 314 + atomic_inc(&bio->__bi_remaining); 304 315 } 305 316 306 317 /** ··· 331 320 332 321 bio->bi_private = parent; 333 322 bio->bi_end_io = bio_chain_endio; 334 - atomic_inc(&parent->bi_remaining); 323 + bio_inc_remaining(parent); 335 324 } 336 325 EXPORT_SYMBOL(bio_chain); 337 326 ··· 535 524 **/ 536 525 void bio_put(struct bio *bio) 537 526 { 538 - BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); 539 - 540 - /* 541 - * last put frees it 542 - */ 543 - if (atomic_dec_and_test(&bio->bi_cnt)) 527 + if (!bio_flagged(bio, BIO_REFFED)) 544 528 bio_free(bio); 529 + else { 530 + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); 531 + 532 + /* 533 + * last put frees it 534 + */ 535 + if (atomic_dec_and_test(&bio->__bi_cnt)) 536 + bio_free(bio); 537 + } 545 538 } 546 539 EXPORT_SYMBOL(bio_put); 547 540 ··· 1756 1741 EXPORT_SYMBOL(bio_flush_dcache_pages); 1757 1742 #endif 1758 1743 1744 + static inline bool bio_remaining_done(struct bio *bio) 1745 + { 1746 + /* 1747 + * If we're not chaining, then ->__bi_remaining is always 1 and 1748 + * we always end io on the first invocation. 1749 + */ 1750 + if (!bio_flagged(bio, BIO_CHAIN)) 1751 + return true; 1752 + 1753 + BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); 1754 + 1755 + if (atomic_dec_and_test(&bio->__bi_remaining)) { 1756 + clear_bit(BIO_CHAIN, &bio->bi_flags); 1757 + return true; 1758 + } 1759 + 1760 + return false; 1761 + } 1762 + 1759 1763 /** 1760 1764 * bio_endio - end I/O on a bio 1761 1765 * @bio: bio ··· 1792 1758 void bio_endio(struct bio *bio, int error) 1793 1759 { 1794 1760 while (bio) { 1795 - BUG_ON(atomic_read(&bio->bi_remaining) <= 0); 1796 - 1797 1761 if (error) 1798 1762 clear_bit(BIO_UPTODATE, &bio->bi_flags); 1799 1763 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1800 1764 error = -EIO; 1801 1765 1802 - if (!atomic_dec_and_test(&bio->bi_remaining)) 1803 - return; 1766 + if (unlikely(!bio_remaining_done(bio))) 1767 + break; 1804 1768 1805 1769 /* 1806 1770 * Need to have a real endio function for chained bios, ··· 1820 1788 } 1821 1789 } 1822 1790 EXPORT_SYMBOL(bio_endio); 1823 - 1824 - /** 1825 - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining 1826 - * @bio: bio 1827 - * @error: error, if any 1828 - * 1829 - * For code that has saved and restored bi_end_io; thing hard before using this 1830 - * function, probably you should've cloned the entire bio. 1831 - **/ 1832 - void bio_endio_nodec(struct bio *bio, int error) 1833 - { 1834 - atomic_inc(&bio->bi_remaining); 1835 - bio_endio(bio, error); 1836 - } 1837 - EXPORT_SYMBOL(bio_endio_nodec); 1838 1791 1839 1792 /** 1840 1793 * bio_split - split a bio

+81 -11

block/blk-cgroup.c

··· 9 9 * 10 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 11 11 * Nauman Rafique <nauman@google.com> 12 + * 13 + * For policy-specific per-blkcg data: 14 + * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> 15 + * Arianna Avanzini <avanzini.arianna@gmail.com> 12 16 */ 13 17 #include <linux/ioprio.h> 14 18 #include <linux/kdev_t.h> ··· 30 26 31 27 static DEFINE_MUTEX(blkcg_pol_mutex); 32 28 33 - struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, 34 - .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; 29 + struct blkcg blkcg_root; 35 30 EXPORT_SYMBOL_GPL(blkcg_root); 36 31 37 32 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; ··· 826 823 blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 827 824 { 828 825 struct blkcg *blkcg; 826 + struct cgroup_subsys_state *ret; 827 + int i; 829 828 830 829 if (!parent_css) { 831 830 blkcg = &blkcg_root; ··· 835 830 } 836 831 837 832 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 838 - if (!blkcg) 839 - return ERR_PTR(-ENOMEM); 833 + if (!blkcg) { 834 + ret = ERR_PTR(-ENOMEM); 835 + goto free_blkcg; 836 + } 840 837 841 - blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; 842 - blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; 838 + for (i = 0; i < BLKCG_MAX_POLS ; i++) { 839 + struct blkcg_policy *pol = blkcg_policy[i]; 840 + struct blkcg_policy_data *cpd; 841 + 842 + /* 843 + * If the policy hasn't been attached yet, wait for it 844 + * to be attached before doing anything else. Otherwise, 845 + * check if the policy requires any specific per-cgroup 846 + * data: if it does, allocate and initialize it. 847 + */ 848 + if (!pol || !pol->cpd_size) 849 + continue; 850 + 851 + BUG_ON(blkcg->pd[i]); 852 + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); 853 + if (!cpd) { 854 + ret = ERR_PTR(-ENOMEM); 855 + goto free_pd_blkcg; 856 + } 857 + blkcg->pd[i] = cpd; 858 + cpd->plid = i; 859 + pol->cpd_init_fn(blkcg); 860 + } 861 + 843 862 done: 844 863 spin_lock_init(&blkcg->lock); 845 864 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 846 865 INIT_HLIST_HEAD(&blkcg->blkg_list); 847 866 848 867 return &blkcg->css; 868 + 869 + free_pd_blkcg: 870 + for (i--; i >= 0; i--) 871 + kfree(blkcg->pd[i]); 872 + 873 + free_blkcg: 874 + kfree(blkcg); 875 + return ret; 849 876 } 850 877 851 878 /** ··· 995 958 const struct blkcg_policy *pol) 996 959 { 997 960 LIST_HEAD(pds); 961 + LIST_HEAD(cpds); 998 962 struct blkcg_gq *blkg, *new_blkg; 999 - struct blkg_policy_data *pd, *n; 963 + struct blkg_policy_data *pd, *nd; 964 + struct blkcg_policy_data *cpd, *cnd; 1000 965 int cnt = 0, ret; 1001 966 bool preloaded; 1002 967 ··· 1042 1003 1043 1004 spin_unlock_irq(q->queue_lock); 1044 1005 1045 - /* allocate policy_data for all existing blkgs */ 1006 + /* 1007 + * Allocate per-blkg and per-blkcg policy data 1008 + * for all existing blkgs. 1009 + */ 1046 1010 while (cnt--) { 1047 1011 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); 1048 1012 if (!pd) { ··· 1053 1011 goto out_free; 1054 1012 } 1055 1013 list_add_tail(&pd->alloc_node, &pds); 1014 + 1015 + if (!pol->cpd_size) 1016 + continue; 1017 + cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node); 1018 + if (!cpd) { 1019 + ret = -ENOMEM; 1020 + goto out_free; 1021 + } 1022 + list_add_tail(&cpd->alloc_node, &cpds); 1056 1023 } 1057 1024 1058 1025 /* 1059 - * Install the allocated pds. With @q bypassing, no new blkg 1026 + * Install the allocated pds and cpds. With @q bypassing, no new blkg 1060 1027 * should have been created while the queue lock was dropped. 1061 1028 */ 1062 1029 spin_lock_irq(q->queue_lock); 1063 1030 1064 1031 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1065 - if (WARN_ON(list_empty(&pds))) { 1032 + if (WARN_ON(list_empty(&pds)) || 1033 + WARN_ON(pol->cpd_size && list_empty(&cpds))) { 1066 1034 /* umm... this shouldn't happen, just abort */ 1067 1035 ret = -ENOMEM; 1068 1036 goto out_unlock; 1069 1037 } 1038 + cpd = list_first_entry(&cpds, struct blkcg_policy_data, 1039 + alloc_node); 1040 + list_del_init(&cpd->alloc_node); 1070 1041 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); 1071 1042 list_del_init(&pd->alloc_node); 1072 1043 1073 1044 /* grab blkcg lock too while installing @pd on @blkg */ 1074 1045 spin_lock(&blkg->blkcg->lock); 1075 1046 1047 + if (!pol->cpd_size) 1048 + goto no_cpd; 1049 + if (!blkg->blkcg->pd[pol->plid]) { 1050 + /* Per-policy per-blkcg data */ 1051 + blkg->blkcg->pd[pol->plid] = cpd; 1052 + cpd->plid = pol->plid; 1053 + pol->cpd_init_fn(blkg->blkcg); 1054 + } else { /* must free it as it has already been extracted */ 1055 + kfree(cpd); 1056 + } 1057 + no_cpd: 1076 1058 blkg->pd[pol->plid] = pd; 1077 1059 pd->blkg = blkg; 1078 1060 pd->plid = pol->plid; ··· 1111 1045 spin_unlock_irq(q->queue_lock); 1112 1046 out_free: 1113 1047 blk_queue_bypass_end(q); 1114 - list_for_each_entry_safe(pd, n, &pds, alloc_node) 1048 + list_for_each_entry_safe(pd, nd, &pds, alloc_node) 1115 1049 kfree(pd); 1050 + list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node) 1051 + kfree(cpd); 1116 1052 return ret; 1117 1053 } 1118 1054 EXPORT_SYMBOL_GPL(blkcg_activate_policy); ··· 1155 1087 1156 1088 kfree(blkg->pd[pol->plid]); 1157 1089 blkg->pd[pol->plid] = NULL; 1090 + kfree(blkg->blkcg->pd[pol->plid]); 1091 + blkg->blkcg->pd[pol->plid] = NULL; 1158 1092 1159 1093 spin_unlock(&blkg->blkcg->lock); 1160 1094 }

+32 -8

block/blk-cgroup.h

··· 23 23 /* Max limits for throttle policy */ 24 24 #define THROTL_IOPS_MAX UINT_MAX 25 25 26 - /* CFQ specific, out here for blkcg->cfq_weight */ 27 - #define CFQ_WEIGHT_MIN 10 28 - #define CFQ_WEIGHT_MAX 1000 29 - #define CFQ_WEIGHT_DEFAULT 500 30 - 31 26 #ifdef CONFIG_BLK_CGROUP 32 27 33 28 enum blkg_rwstat_type { ··· 45 50 struct blkcg_gq *blkg_hint; 46 51 struct hlist_head blkg_list; 47 52 48 - /* TODO: per-policy storage in blkcg */ 49 - unsigned int cfq_weight; /* belongs to cfq */ 50 - unsigned int cfq_leaf_weight; 53 + struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; 51 54 }; 52 55 53 56 struct blkg_stat { ··· 80 87 struct list_head alloc_node; 81 88 }; 82 89 90 + /* 91 + * Policies that need to keep per-blkcg data which is independent 92 + * from any request_queue associated to it must specify its size 93 + * with the cpd_size field of the blkcg_policy structure and 94 + * embed a blkcg_policy_data in it. blkcg core allocates 95 + * policy-specific per-blkcg structures lazily the first time 96 + * they are actually needed, so it handles them together with 97 + * blkgs. cpd_init() is invoked to let each policy handle 98 + * per-blkcg data. 99 + */ 100 + struct blkcg_policy_data { 101 + /* the policy id this per-policy data belongs to */ 102 + int plid; 103 + 104 + /* used during policy activation */ 105 + struct list_head alloc_node; 106 + }; 107 + 83 108 /* association between a blk cgroup and a request queue */ 84 109 struct blkcg_gq { 85 110 /* Pointer to the associated request_queue */ ··· 123 112 struct rcu_head rcu_head; 124 113 }; 125 114 115 + typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); 126 116 typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 127 117 typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 128 118 typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); ··· 134 122 int plid; 135 123 /* policy specific private data size */ 136 124 size_t pd_size; 125 + /* policy specific per-blkcg data size */ 126 + size_t cpd_size; 137 127 /* cgroup files for the policy */ 138 128 struct cftype *cftypes; 139 129 140 130 /* operations */ 131 + blkcg_pol_init_cpd_fn *cpd_init_fn; 141 132 blkcg_pol_init_pd_fn *pd_init_fn; 142 133 blkcg_pol_online_pd_fn *pd_online_fn; 143 134 blkcg_pol_offline_pd_fn *pd_offline_fn; ··· 231 216 struct blkcg_policy *pol) 232 217 { 233 218 return blkg ? blkg->pd[pol->plid] : NULL; 219 + } 220 + 221 + static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 222 + struct blkcg_policy *pol) 223 + { 224 + return blkcg ? blkcg->pd[pol->plid] : NULL; 234 225 } 235 226 236 227 /** ··· 583 562 struct blkcg; 584 563 585 564 struct blkg_policy_data { 565 + }; 566 + 567 + struct blkcg_policy_data { 586 568 }; 587 569 588 570 struct blkcg_gq {

+36 -100

block/blk-core.c

··· 117 117 static void req_bio_endio(struct request *rq, struct bio *bio, 118 118 unsigned int nbytes, int error) 119 119 { 120 - if (error) 120 + if (error && !(rq->cmd_flags & REQ_CLONE)) 121 121 clear_bit(BIO_UPTODATE, &bio->bi_flags); 122 122 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 123 123 error = -EIO; ··· 128 128 bio_advance(bio, nbytes); 129 129 130 130 /* don't actually finish bio if it's part of flush sequence */ 131 - if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 131 + if (bio->bi_iter.bi_size == 0 && 132 + !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE))) 132 133 bio_endio(bio, error); 133 134 } 134 135 ··· 286 285 q->request_fn(q); 287 286 q->request_fn_active--; 288 287 } 288 + EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); 289 289 290 290 /** 291 291 * __blk_run_queue - run a single device queue ··· 1527 1525 * Caller must ensure !blk_queue_nomerges(q) beforehand. 1528 1526 */ 1529 1527 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 1530 - unsigned int *request_count) 1528 + unsigned int *request_count, 1529 + struct request **same_queue_rq) 1531 1530 { 1532 1531 struct blk_plug *plug; 1533 1532 struct request *rq; ··· 1548 1545 list_for_each_entry_reverse(rq, plug_list, queuelist) { 1549 1546 int el_ret; 1550 1547 1551 - if (rq->q == q) 1548 + if (rq->q == q) { 1552 1549 (*request_count)++; 1550 + /* 1551 + * Only blk-mq multiple hardware queues case checks the 1552 + * rq in the same queue, there should be only one such 1553 + * rq in a queue 1554 + **/ 1555 + if (same_queue_rq) 1556 + *same_queue_rq = rq; 1557 + } 1553 1558 1554 1559 if (rq->q != q || !blk_rq_merge_ok(rq, bio)) 1555 1560 continue; ··· 1622 1611 * any locks. 1623 1612 */ 1624 1613 if (!blk_queue_nomerges(q) && 1625 - blk_attempt_plug_merge(q, bio, &request_count)) 1614 + blk_attempt_plug_merge(q, bio, &request_count, NULL)) 1626 1615 return; 1627 1616 1628 1617 spin_lock_irq(q->queue_lock); ··· 1729 1718 bio->bi_rw, 1730 1719 (unsigned long long)bio_end_sector(bio), 1731 1720 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); 1732 - 1733 - set_bit(BIO_EOF, &bio->bi_flags); 1734 1721 } 1735 1722 1736 1723 #ifdef CONFIG_FAIL_MAKE_REQUEST ··· 2913 2904 } 2914 2905 EXPORT_SYMBOL_GPL(blk_lld_busy); 2915 2906 2916 - /** 2917 - * blk_rq_unprep_clone - Helper function to free all bios in a cloned request 2918 - * @rq: the clone request to be cleaned up 2919 - * 2920 - * Description: 2921 - * Free all bios in @rq for a cloned request. 2922 - */ 2923 - void blk_rq_unprep_clone(struct request *rq) 2924 - { 2925 - struct bio *bio; 2926 - 2927 - while ((bio = rq->bio) != NULL) { 2928 - rq->bio = bio->bi_next; 2929 - 2930 - bio_put(bio); 2931 - } 2932 - } 2933 - EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); 2934 - 2935 - /* 2936 - * Copy attributes of the original request to the clone request. 2937 - * The actual data parts (e.g. ->cmd, ->sense) are not copied. 2938 - */ 2939 - static void __blk_rq_prep_clone(struct request *dst, struct request *src) 2907 + void blk_rq_prep_clone(struct request *dst, struct request *src) 2940 2908 { 2941 2909 dst->cpu = src->cpu; 2942 - dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; 2910 + dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK); 2911 + dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE; 2943 2912 dst->cmd_type = src->cmd_type; 2944 2913 dst->__sector = blk_rq_pos(src); 2945 2914 dst->__data_len = blk_rq_bytes(src); 2946 2915 dst->nr_phys_segments = src->nr_phys_segments; 2947 2916 dst->ioprio = src->ioprio; 2948 2917 dst->extra_len = src->extra_len; 2949 - } 2950 - 2951 - /** 2952 - * blk_rq_prep_clone - Helper function to setup clone request 2953 - * @rq: the request to be setup 2954 - * @rq_src: original request to be cloned 2955 - * @bs: bio_set that bios for clone are allocated from 2956 - * @gfp_mask: memory allocation mask for bio 2957 - * @bio_ctr: setup function to be called for each clone bio. 2958 - * Returns %0 for success, non %0 for failure. 2959 - * @data: private data to be passed to @bio_ctr 2960 - * 2961 - * Description: 2962 - * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. 2963 - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) 2964 - * are not copied, and copying such parts is the caller's responsibility. 2965 - * Also, pages which the original bios are pointing to are not copied 2966 - * and the cloned bios just point same pages. 2967 - * So cloned bios must be completed before original bios, which means 2968 - * the caller must complete @rq before @rq_src. 2969 - */ 2970 - int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 2971 - struct bio_set *bs, gfp_t gfp_mask, 2972 - int (*bio_ctr)(struct bio *, struct bio *, void *), 2973 - void *data) 2974 - { 2975 - struct bio *bio, *bio_src; 2976 - 2977 - if (!bs) 2978 - bs = fs_bio_set; 2979 - 2980 - __rq_for_each_bio(bio_src, rq_src) { 2981 - bio = bio_clone_fast(bio_src, gfp_mask, bs); 2982 - if (!bio) 2983 - goto free_and_out; 2984 - 2985 - if (bio_ctr && bio_ctr(bio, bio_src, data)) 2986 - goto free_and_out; 2987 - 2988 - if (rq->bio) { 2989 - rq->biotail->bi_next = bio; 2990 - rq->biotail = bio; 2991 - } else 2992 - rq->bio = rq->biotail = bio; 2993 - } 2994 - 2995 - __blk_rq_prep_clone(rq, rq_src); 2996 - 2997 - return 0; 2998 - 2999 - free_and_out: 3000 - if (bio) 3001 - bio_put(bio); 3002 - blk_rq_unprep_clone(rq); 3003 - 3004 - return -ENOMEM; 2918 + dst->bio = src->bio; 2919 + dst->biotail = src->biotail; 2920 + dst->cmd = src->cmd; 2921 + dst->cmd_len = src->cmd_len; 2922 + dst->sense = src->sense; 3005 2923 } 3006 2924 EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 3007 2925 ··· 2970 3034 { 2971 3035 struct task_struct *tsk = current; 2972 3036 3037 + /* 3038 + * If this is a nested plug, don't actually assign it. 3039 + */ 3040 + if (tsk->plug) 3041 + return; 3042 + 2973 3043 INIT_LIST_HEAD(&plug->list); 2974 3044 INIT_LIST_HEAD(&plug->mq_list); 2975 3045 INIT_LIST_HEAD(&plug->cb_list); 2976 - 2977 3046 /* 2978 - * If this is a nested plug, don't actually assign it. It will be 2979 - * flushed on its own. 3047 + * Store ordering should not be needed here, since a potential 3048 + * preempt will imply a full memory barrier 2980 3049 */ 2981 - if (!tsk->plug) { 2982 - /* 2983 - * Store ordering should not be needed here, since a potential 2984 - * preempt will imply a full memory barrier 2985 - */ 2986 - tsk->plug = plug; 2987 - } 3050 + tsk->plug = plug; 2988 3051 } 2989 3052 EXPORT_SYMBOL(blk_start_plug); 2990 3053 ··· 3130 3195 3131 3196 void blk_finish_plug(struct blk_plug *plug) 3132 3197 { 3198 + if (plug != current->plug) 3199 + return; 3133 3200 blk_flush_plug_list(plug, false); 3134 3201 3135 - if (plug == current->plug) 3136 - current->plug = NULL; 3202 + current->plug = NULL; 3137 3203 } 3138 3204 EXPORT_SYMBOL(blk_finish_plug); 3139 3205

-10

block/blk-exec.c

··· 53 53 rq_end_io_fn *done) 54 54 { 55 55 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 56 - bool is_pm_resume; 57 56 58 57 WARN_ON(irqs_disabled()); 59 58 WARN_ON(rq->cmd_type == REQ_TYPE_FS); ··· 69 70 return; 70 71 } 71 72 72 - /* 73 - * need to check this before __blk_run_queue(), because rq can 74 - * be freed before that returns. 75 - */ 76 - is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME; 77 - 78 73 spin_lock_irq(q->queue_lock); 79 74 80 75 if (unlikely(blk_queue_dying(q))) { ··· 81 88 82 89 __elv_add_request(q, rq, where); 83 90 __blk_run_queue(q); 84 - /* the queue is stopped so it won't be run */ 85 - if (is_pm_resume) 86 - __blk_run_queue_uncond(q); 87 91 spin_unlock_irq(q->queue_lock); 88 92 } 89 93 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

+2 -1

block/blk-merge.c

··· 589 589 !blk_write_same_mergeable(rq->bio, bio)) 590 590 return false; 591 591 592 - if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 592 + /* Only check gaps if the bio carries data */ 593 + if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && bio_has_data(bio)) { 593 594 struct bio_vec *bprev; 594 595 595 596 bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];

+38

block/blk-mq-tag.c

··· 438 438 } 439 439 } 440 440 441 + static void bt_tags_for_each(struct blk_mq_tags *tags, 442 + struct blk_mq_bitmap_tags *bt, unsigned int off, 443 + busy_tag_iter_fn *fn, void *data, bool reserved) 444 + { 445 + struct request *rq; 446 + int bit, i; 447 + 448 + if (!tags->rqs) 449 + return; 450 + for (i = 0; i < bt->map_nr; i++) { 451 + struct blk_align_bitmap *bm = &bt->map[i]; 452 + 453 + for (bit = find_first_bit(&bm->word, bm->depth); 454 + bit < bm->depth; 455 + bit = find_next_bit(&bm->word, bm->depth, bit + 1)) { 456 + rq = blk_mq_tag_to_rq(tags, off + bit); 457 + fn(rq, data, reserved); 458 + } 459 + 460 + off += (1 << bt->bits_per_word); 461 + } 462 + } 463 + 464 + void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, 465 + void *priv) 466 + { 467 + if (tags->nr_reserved_tags) 468 + bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true); 469 + bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, 470 + false); 471 + } 472 + EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); 473 + 441 474 void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 442 475 void *priv) 443 476 { ··· 612 579 tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node); 613 580 if (!tags) 614 581 return NULL; 582 + 583 + if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) { 584 + kfree(tags); 585 + return NULL; 586 + } 615 587 616 588 tags->nr_tags = total_tags; 617 589 tags->nr_reserved_tags = reserved_tags;

+1

block/blk-mq-tag.h

··· 44 44 struct list_head page_list; 45 45 46 46 int alloc_policy; 47 + cpumask_var_t cpumask; 47 48 }; 48 49 49 50

+99 -61

block/blk-mq.c

··· 89 89 return -EBUSY; 90 90 91 91 ret = wait_event_interruptible(q->mq_freeze_wq, 92 - !q->mq_freeze_depth || blk_queue_dying(q)); 92 + !atomic_read(&q->mq_freeze_depth) || 93 + blk_queue_dying(q)); 93 94 if (blk_queue_dying(q)) 94 95 return -ENODEV; 95 96 if (ret) ··· 113 112 114 113 void blk_mq_freeze_queue_start(struct request_queue *q) 115 114 { 116 - bool freeze; 115 + int freeze_depth; 117 116 118 - spin_lock_irq(q->queue_lock); 119 - freeze = !q->mq_freeze_depth++; 120 - spin_unlock_irq(q->queue_lock); 121 - 122 - if (freeze) { 117 + freeze_depth = atomic_inc_return(&q->mq_freeze_depth); 118 + if (freeze_depth == 1) { 123 119 percpu_ref_kill(&q->mq_usage_counter); 124 120 blk_mq_run_hw_queues(q, false); 125 121 } ··· 141 143 142 144 void blk_mq_unfreeze_queue(struct request_queue *q) 143 145 { 144 - bool wake; 146 + int freeze_depth; 145 147 146 - spin_lock_irq(q->queue_lock); 147 - wake = !--q->mq_freeze_depth; 148 - WARN_ON_ONCE(q->mq_freeze_depth < 0); 149 - spin_unlock_irq(q->queue_lock); 150 - if (wake) { 148 + freeze_depth = atomic_dec_return(&q->mq_freeze_depth); 149 + WARN_ON_ONCE(freeze_depth < 0); 150 + if (!freeze_depth) { 151 151 percpu_ref_reinit(&q->mq_usage_counter); 152 152 wake_up_all(&q->mq_freeze_wq); 153 153 } ··· 1233 1237 return rq; 1234 1238 } 1235 1239 1240 + static int blk_mq_direct_issue_request(struct request *rq) 1241 + { 1242 + int ret; 1243 + struct request_queue *q = rq->q; 1244 + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, 1245 + rq->mq_ctx->cpu); 1246 + struct blk_mq_queue_data bd = { 1247 + .rq = rq, 1248 + .list = NULL, 1249 + .last = 1 1250 + }; 1251 + 1252 + /* 1253 + * For OK queue, we are done. For error, kill it. Any other 1254 + * error (busy), just add it to our list as we previously 1255 + * would have done 1256 + */ 1257 + ret = q->mq_ops->queue_rq(hctx, &bd); 1258 + if (ret == BLK_MQ_RQ_QUEUE_OK) 1259 + return 0; 1260 + else { 1261 + __blk_mq_requeue_request(rq); 1262 + 1263 + if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1264 + rq->errors = -EIO; 1265 + blk_mq_end_request(rq, rq->errors); 1266 + return 0; 1267 + } 1268 + return -1; 1269 + } 1270 + } 1271 + 1236 1272 /* 1237 1273 * Multiple hardware queue variant. This will not use per-process plugs, 1238 1274 * but will attempt to bypass the hctx queueing if we can go straight to ··· 1276 1248 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1277 1249 struct blk_map_ctx data; 1278 1250 struct request *rq; 1251 + unsigned int request_count = 0; 1252 + struct blk_plug *plug; 1253 + struct request *same_queue_rq = NULL; 1279 1254 1280 1255 blk_queue_bounce(q, &bio); 1281 1256 ··· 1286 1255 bio_endio(bio, -EIO); 1287 1256 return; 1288 1257 } 1258 + 1259 + if (!is_flush_fua && !blk_queue_nomerges(q) && 1260 + blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) 1261 + return; 1289 1262 1290 1263 rq = blk_mq_map_request(q, bio, &data); 1291 1264 if (unlikely(!rq)) ··· 1301 1266 goto run_queue; 1302 1267 } 1303 1268 1269 + plug = current->plug; 1304 1270 /* 1305 1271 * If the driver supports defer issued based on 'last', then 1306 1272 * queue it up like normal since we can potentially save some 1307 1273 * CPU this way. 1308 1274 */ 1309 - if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { 1310 - struct blk_mq_queue_data bd = { 1311 - .rq = rq, 1312 - .list = NULL, 1313 - .last = 1 1314 - }; 1315 - int ret; 1275 + if (((plug && !blk_queue_nomerges(q)) || is_sync) && 1276 + !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { 1277 + struct request *old_rq = NULL; 1316 1278 1317 1279 blk_mq_bio_to_request(rq, bio); 1318 1280 1319 1281 /* 1320 - * For OK queue, we are done. For error, kill it. Any other 1321 - * error (busy), just add it to our list as we previously 1322 - * would have done 1282 + * we do limited pluging. If bio can be merged, do merge. 1283 + * Otherwise the existing request in the plug list will be 1284 + * issued. So the plug list will have one request at most 1323 1285 */ 1324 - ret = q->mq_ops->queue_rq(data.hctx, &bd); 1325 - if (ret == BLK_MQ_RQ_QUEUE_OK) 1326 - goto done; 1327 - else { 1328 - __blk_mq_requeue_request(rq); 1329 - 1330 - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1331 - rq->errors = -EIO; 1332 - blk_mq_end_request(rq, rq->errors); 1333 - goto done; 1286 + if (plug) { 1287 + /* 1288 + * The plug list might get flushed before this. If that 1289 + * happens, same_queue_rq is invalid and plug list is empty 1290 + **/ 1291 + if (same_queue_rq && !list_empty(&plug->mq_list)) { 1292 + old_rq = same_queue_rq; 1293 + list_del_init(&old_rq->queuelist); 1334 1294 } 1335 - } 1295 + list_add_tail(&rq->queuelist, &plug->mq_list); 1296 + } else /* is_sync */ 1297 + old_rq = rq; 1298 + blk_mq_put_ctx(data.ctx); 1299 + if (!old_rq) 1300 + return; 1301 + if (!blk_mq_direct_issue_request(old_rq)) 1302 + return; 1303 + blk_mq_insert_request(old_rq, false, true, true); 1304 + return; 1336 1305 } 1337 1306 1338 1307 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { ··· 1349 1310 run_queue: 1350 1311 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); 1351 1312 } 1352 - done: 1353 1313 blk_mq_put_ctx(data.ctx); 1354 1314 } 1355 1315 ··· 1360 1322 { 1361 1323 const int is_sync = rw_is_sync(bio->bi_rw); 1362 1324 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1363 - unsigned int use_plug, request_count = 0; 1325 + struct blk_plug *plug; 1326 + unsigned int request_count = 0; 1364 1327 struct blk_map_ctx data; 1365 1328 struct request *rq; 1366 - 1367 - /* 1368 - * If we have multiple hardware queues, just go directly to 1369 - * one of those for sync IO. 1370 - */ 1371 - use_plug = !is_flush_fua && !is_sync; 1372 1329 1373 1330 blk_queue_bounce(q, &bio); 1374 1331 ··· 1372 1339 return; 1373 1340 } 1374 1341 1375 - if (use_plug && !blk_queue_nomerges(q) && 1376 - blk_attempt_plug_merge(q, bio, &request_count)) 1342 + if (!is_flush_fua && !blk_queue_nomerges(q) && 1343 + blk_attempt_plug_merge(q, bio, &request_count, NULL)) 1377 1344 return; 1378 1345 1379 1346 rq = blk_mq_map_request(q, bio, &data); ··· 1391 1358 * utilize that to temporarily store requests until the task is 1392 1359 * either done or scheduled away. 1393 1360 */ 1394 - if (use_plug) { 1395 - struct blk_plug *plug = current->plug; 1396 - 1397 - if (plug) { 1398 - blk_mq_bio_to_request(rq, bio); 1399 - if (list_empty(&plug->mq_list)) 1400 - trace_block_plug(q); 1401 - else if (request_count >= BLK_MAX_REQUEST_COUNT) { 1402 - blk_flush_plug_list(plug, false); 1403 - trace_block_plug(q); 1404 - } 1405 - list_add_tail(&rq->queuelist, &plug->mq_list); 1406 - blk_mq_put_ctx(data.ctx); 1407 - return; 1361 + plug = current->plug; 1362 + if (plug) { 1363 + blk_mq_bio_to_request(rq, bio); 1364 + if (list_empty(&plug->mq_list)) 1365 + trace_block_plug(q); 1366 + else if (request_count >= BLK_MAX_REQUEST_COUNT) { 1367 + blk_flush_plug_list(plug, false); 1368 + trace_block_plug(q); 1408 1369 } 1370 + list_add_tail(&rq->queuelist, &plug->mq_list); 1371 + blk_mq_put_ctx(data.ctx); 1372 + return; 1409 1373 } 1410 1374 1411 1375 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { ··· 1538 1508 i++; 1539 1509 } 1540 1510 } 1541 - 1542 1511 return tags; 1543 1512 1544 1513 fail: ··· 1821 1792 1822 1793 hctx = q->mq_ops->map_queue(q, i); 1823 1794 cpumask_set_cpu(i, hctx->cpumask); 1795 + cpumask_set_cpu(i, hctx->tags->cpumask); 1824 1796 ctx->index_hw = hctx->nr_ctx; 1825 1797 hctx->ctxs[hctx->nr_ctx++] = ctx; 1826 1798 } ··· 2086 2056 /* Basically redo blk_mq_init_queue with queue frozen */ 2087 2057 static void blk_mq_queue_reinit(struct request_queue *q) 2088 2058 { 2089 - WARN_ON_ONCE(!q->mq_freeze_depth); 2059 + WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); 2090 2060 2091 2061 blk_mq_sysfs_unregister(q); 2092 2062 ··· 2203 2173 return 0; 2204 2174 } 2205 2175 2176 + struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags) 2177 + { 2178 + return tags->cpumask; 2179 + } 2180 + EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask); 2181 + 2206 2182 /* 2207 2183 * Alloc a tag set to be associated with one or more request queues. 2208 2184 * May fail with EINVAL for various error conditions. May adjust the ··· 2270 2234 int i; 2271 2235 2272 2236 for (i = 0; i < set->nr_hw_queues; i++) { 2273 - if (set->tags[i]) 2237 + if (set->tags[i]) { 2274 2238 blk_mq_free_rq_map(set, set->tags[i], i); 2239 + free_cpumask_var(set->tags[i]->cpumask); 2240 + } 2275 2241 } 2276 2242 2277 2243 kfree(set->tags);

+2 -3

block/blk.h

··· 78 78 bool bio_attempt_back_merge(struct request_queue *q, struct request *req, 79 79 struct bio *bio); 80 80 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 81 - unsigned int *request_count); 81 + unsigned int *request_count, 82 + struct request **same_queue_rq); 82 83 83 84 void blk_account_io_start(struct request *req, bool new_io); 84 85 void blk_account_io_completion(struct request *req, unsigned int bytes); ··· 193 192 int blk_try_merge(struct request *rq, struct bio *bio); 194 193 195 194 void blk_queue_congestion_threshold(struct request_queue *q); 196 - 197 - void __blk_run_queue_uncond(struct request_queue *q); 198 195 199 196 int blk_dev_init(void); 200 197

-3

block/bounce.c

··· 128 128 struct bio_vec *bvec, *org_vec; 129 129 int i; 130 130 131 - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) 132 - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); 133 - 134 131 /* 135 132 * free up bounce indirect pages used 136 133 */

+104 -21

block/cfq-iosched.c

··· 67 67 #define sample_valid(samples) ((samples) > 80) 68 68 #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) 69 69 70 + /* blkio-related constants */ 71 + #define CFQ_WEIGHT_MIN 10 72 + #define CFQ_WEIGHT_MAX 1000 73 + #define CFQ_WEIGHT_DEFAULT 500 74 + 70 75 struct cfq_ttime { 71 76 unsigned long last_end_request; 72 77 ··· 215 210 uint16_t flags; 216 211 #endif /* CONFIG_DEBUG_BLK_CGROUP */ 217 212 #endif /* CONFIG_CFQ_GROUP_IOSCHED */ 213 + }; 214 + 215 + /* Per-cgroup data */ 216 + struct cfq_group_data { 217 + /* must be the first member */ 218 + struct blkcg_policy_data pd; 219 + 220 + unsigned int weight; 221 + unsigned int leaf_weight; 218 222 }; 219 223 220 224 /* This is per cgroup per device grouping structure */ ··· 460 446 CFQ_CFQQ_FNS(wait_busy); 461 447 #undef CFQ_CFQQ_FNS 462 448 463 - static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) 464 - { 465 - return pd ? container_of(pd, struct cfq_group, pd) : NULL; 466 - } 467 - 468 - static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) 469 - { 470 - return pd_to_blkg(&cfqg->pd); 471 - } 472 - 473 449 #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) 474 450 475 451 /* cfqg stats flags */ ··· 604 600 605 601 #ifdef CONFIG_CFQ_GROUP_IOSCHED 606 602 603 + static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) 604 + { 605 + return pd ? container_of(pd, struct cfq_group, pd) : NULL; 606 + } 607 + 608 + static struct cfq_group_data 609 + *cpd_to_cfqgd(struct blkcg_policy_data *cpd) 610 + { 611 + return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL; 612 + } 613 + 614 + static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) 615 + { 616 + return pd_to_blkg(&cfqg->pd); 617 + } 618 + 607 619 static struct blkcg_policy blkcg_policy_cfq; 608 620 609 621 static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) 610 622 { 611 623 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); 624 + } 625 + 626 + static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg) 627 + { 628 + return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq)); 612 629 } 613 630 614 631 static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) ··· 1569 1544 #endif 1570 1545 } 1571 1546 1547 + static void cfq_cpd_init(const struct blkcg *blkcg) 1548 + { 1549 + struct cfq_group_data *cgd = 1550 + cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]); 1551 + 1552 + if (blkcg == &blkcg_root) { 1553 + cgd->weight = 2 * CFQ_WEIGHT_DEFAULT; 1554 + cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; 1555 + } else { 1556 + cgd->weight = CFQ_WEIGHT_DEFAULT; 1557 + cgd->leaf_weight = CFQ_WEIGHT_DEFAULT; 1558 + } 1559 + } 1560 + 1572 1561 static void cfq_pd_init(struct blkcg_gq *blkg) 1573 1562 { 1574 1563 struct cfq_group *cfqg = blkg_to_cfqg(blkg); 1564 + struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg); 1575 1565 1576 1566 cfq_init_cfqg_base(cfqg); 1577 - cfqg->weight = blkg->blkcg->cfq_weight; 1578 - cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; 1567 + cfqg->weight = cgd->weight; 1568 + cfqg->leaf_weight = cgd->leaf_weight; 1579 1569 cfqg_stats_init(&cfqg->stats); 1580 1570 cfqg_stats_init(&cfqg->dead_stats); 1581 1571 } ··· 1713 1673 1714 1674 static int cfq_print_weight(struct seq_file *sf, void *v) 1715 1675 { 1716 - seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); 1676 + struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); 1677 + struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg); 1678 + unsigned int val = 0; 1679 + 1680 + if (cgd) 1681 + val = cgd->weight; 1682 + 1683 + seq_printf(sf, "%u\n", val); 1717 1684 return 0; 1718 1685 } 1719 1686 1720 1687 static int cfq_print_leaf_weight(struct seq_file *sf, void *v) 1721 1688 { 1722 - seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); 1689 + struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); 1690 + struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg); 1691 + unsigned int val = 0; 1692 + 1693 + if (cgd) 1694 + val = cgd->leaf_weight; 1695 + 1696 + seq_printf(sf, "%u\n", val); 1723 1697 return 0; 1724 1698 } 1725 1699 ··· 1744 1690 struct blkcg *blkcg = css_to_blkcg(of_css(of)); 1745 1691 struct blkg_conf_ctx ctx; 1746 1692 struct cfq_group *cfqg; 1693 + struct cfq_group_data *cfqgd; 1747 1694 int ret; 1748 1695 1749 1696 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx); ··· 1753 1698 1754 1699 ret = -EINVAL; 1755 1700 cfqg = blkg_to_cfqg(ctx.blkg); 1701 + cfqgd = blkcg_to_cfqgd(blkcg); 1702 + if (!cfqg || !cfqgd) 1703 + goto err; 1704 + 1756 1705 if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { 1757 1706 if (!is_leaf_weight) { 1758 1707 cfqg->dev_weight = ctx.v; 1759 - cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; 1708 + cfqg->new_weight = ctx.v ?: cfqgd->weight; 1760 1709 } else { 1761 1710 cfqg->dev_leaf_weight = ctx.v; 1762 - cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; 1711 + cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight; 1763 1712 } 1764 1713 ret = 0; 1765 1714 } 1766 1715 1716 + err: 1767 1717 blkg_conf_finish(&ctx); 1768 1718 return ret ?: nbytes; 1769 1719 } ··· 1790 1730 { 1791 1731 struct blkcg *blkcg = css_to_blkcg(css); 1792 1732 struct blkcg_gq *blkg; 1733 + struct cfq_group_data *cfqgd; 1734 + int ret = 0; 1793 1735 1794 1736 if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) 1795 1737 return -EINVAL; 1796 1738 1797 1739 spin_lock_irq(&blkcg->lock); 1740 + cfqgd = blkcg_to_cfqgd(blkcg); 1741 + if (!cfqgd) { 1742 + ret = -EINVAL; 1743 + goto out; 1744 + } 1798 1745 1799 1746 if (!is_leaf_weight) 1800 - blkcg->cfq_weight = val; 1747 + cfqgd->weight = val; 1801 1748 else 1802 - blkcg->cfq_leaf_weight = val; 1749 + cfqgd->leaf_weight = val; 1803 1750 1804 1751 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { 1805 1752 struct cfq_group *cfqg = blkg_to_cfqg(blkg); ··· 1816 1749 1817 1750 if (!is_leaf_weight) { 1818 1751 if (!cfqg->dev_weight) 1819 - cfqg->new_weight = blkcg->cfq_weight; 1752 + cfqg->new_weight = cfqgd->weight; 1820 1753 } else { 1821 1754 if (!cfqg->dev_leaf_weight) 1822 - cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; 1755 + cfqg->new_leaf_weight = cfqgd->leaf_weight; 1823 1756 } 1824 1757 } 1825 1758 1759 + out: 1826 1760 spin_unlock_irq(&blkcg->lock); 1827 - return 0; 1761 + return ret; 1828 1762 } 1829 1763 1830 1764 static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, ··· 4545 4477 return ret; 4546 4478 } 4547 4479 4480 + static void cfq_registered_queue(struct request_queue *q) 4481 + { 4482 + struct elevator_queue *e = q->elevator; 4483 + struct cfq_data *cfqd = e->elevator_data; 4484 + 4485 + /* 4486 + * Default to IOPS mode with no idling for SSDs 4487 + */ 4488 + if (blk_queue_nonrot(q)) 4489 + cfqd->cfq_slice_idle = 0; 4490 + } 4491 + 4548 4492 /* 4549 4493 * sysfs parts below --> 4550 4494 */ ··· 4672 4592 .elevator_may_queue_fn = cfq_may_queue, 4673 4593 .elevator_init_fn = cfq_init_queue, 4674 4594 .elevator_exit_fn = cfq_exit_queue, 4595 + .elevator_registered_fn = cfq_registered_queue, 4675 4596 }, 4676 4597 .icq_size = sizeof(struct cfq_io_cq), 4677 4598 .icq_align = __alignof__(struct cfq_io_cq), ··· 4684 4603 #ifdef CONFIG_CFQ_GROUP_IOSCHED 4685 4604 static struct blkcg_policy blkcg_policy_cfq = { 4686 4605 .pd_size = sizeof(struct cfq_group), 4606 + .cpd_size = sizeof(struct cfq_group_data), 4687 4607 .cftypes = cfq_blkcg_files, 4688 4608 4609 + .cpd_init_fn = cfq_cpd_init, 4689 4610 .pd_init_fn = cfq_pd_init, 4690 4611 .pd_offline_fn = cfq_pd_offline, 4691 4612 .pd_reset_stats_fn = cfq_pd_reset_stats,

+2

block/elevator.c

··· 806 806 } 807 807 kobject_uevent(&e->kobj, KOBJ_ADD); 808 808 e->registered = 1; 809 + if (e->type->ops.elevator_registered_fn) 810 + e->type->ops.elevator_registered_fn(q); 809 811 } 810 812 return error; 811 813 }

+32 -5

block/ioctl.c

··· 150 150 } 151 151 } 152 152 153 - static int blkdev_reread_part(struct block_device *bdev) 153 + /* 154 + * This is an exported API for the block driver, and will not 155 + * acquire bd_mutex. This API should be used in case that 156 + * caller has held bd_mutex already. 157 + */ 158 + int __blkdev_reread_part(struct block_device *bdev) 154 159 { 155 160 struct gendisk *disk = bdev->bd_disk; 156 - int res; 157 161 158 162 if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) 159 163 return -EINVAL; 160 164 if (!capable(CAP_SYS_ADMIN)) 161 165 return -EACCES; 162 - if (!mutex_trylock(&bdev->bd_mutex)) 163 - return -EBUSY; 164 - res = rescan_partitions(disk, bdev); 166 + 167 + lockdep_assert_held(&bdev->bd_mutex); 168 + 169 + return rescan_partitions(disk, bdev); 170 + } 171 + EXPORT_SYMBOL(__blkdev_reread_part); 172 + 173 + /* 174 + * This is an exported API for the block driver, and will 175 + * try to acquire bd_mutex. If bd_mutex has been held already 176 + * in current context, please call __blkdev_reread_part(). 177 + * 178 + * Make sure the held locks in current context aren't required 179 + * in open()/close() handler and I/O path for avoiding ABBA deadlock: 180 + * - bd_mutex is held before calling block driver's open/close 181 + * handler 182 + * - reading partition table may submit I/O to the block device 183 + */ 184 + int blkdev_reread_part(struct block_device *bdev) 185 + { 186 + int res; 187 + 188 + mutex_lock(&bdev->bd_mutex); 189 + res = __blkdev_reread_part(bdev); 165 190 mutex_unlock(&bdev->bd_mutex); 191 + 166 192 return res; 167 193 } 194 + EXPORT_SYMBOL(blkdev_reread_part); 168 195 169 196 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, 170 197 uint64_t len, int secure)

+24 -26

drivers/block/nbd.c

··· 230 230 int result, flags; 231 231 struct nbd_request request; 232 232 unsigned long size = blk_rq_bytes(req); 233 + u32 type; 234 + 235 + if (req->cmd_type == REQ_TYPE_DRV_PRIV) 236 + type = NBD_CMD_DISC; 237 + else if (req->cmd_flags & REQ_DISCARD) 238 + type = NBD_CMD_TRIM; 239 + else if (req->cmd_flags & REQ_FLUSH) 240 + type = NBD_CMD_FLUSH; 241 + else if (rq_data_dir(req) == WRITE) 242 + type = NBD_CMD_WRITE; 243 + else 244 + type = NBD_CMD_READ; 233 245 234 246 memset(&request, 0, sizeof(request)); 235 247 request.magic = htonl(NBD_REQUEST_MAGIC); 236 - request.type = htonl(nbd_cmd(req)); 237 - 238 - if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) { 248 + request.type = htonl(type); 249 + if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) { 239 250 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 240 251 request.len = htonl(size); 241 252 } 242 253 memcpy(request.handle, &req, sizeof(req)); 243 254 244 255 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 245 - req, nbdcmd_to_ascii(nbd_cmd(req)), 256 + req, nbdcmd_to_ascii(type), 246 257 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 247 258 result = sock_xmit(nbd, 1, &request, sizeof(request), 248 - (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 259 + (type == NBD_CMD_WRITE) ? MSG_MORE : 0); 249 260 if (result <= 0) { 250 261 dev_err(disk_to_dev(nbd->disk), 251 262 "Send control failed (result %d)\n", result); 252 263 return -EIO; 253 264 } 254 265 255 - if (nbd_cmd(req) == NBD_CMD_WRITE) { 266 + if (type == NBD_CMD_WRITE) { 256 267 struct req_iterator iter; 257 268 struct bio_vec bvec; 258 269 /* ··· 363 352 } 364 353 365 354 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 366 - if (nbd_cmd(req) == NBD_CMD_READ) { 355 + if (rq_data_dir(req) != WRITE) { 367 356 struct req_iterator iter; 368 357 struct bio_vec bvec; 369 358 ··· 463 452 if (req->cmd_type != REQ_TYPE_FS) 464 453 goto error_out; 465 454 466 - nbd_cmd(req) = NBD_CMD_READ; 467 - if (rq_data_dir(req) == WRITE) { 468 - if ((req->cmd_flags & REQ_DISCARD)) { 469 - WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM)); 470 - nbd_cmd(req) = NBD_CMD_TRIM; 471 - } else 472 - nbd_cmd(req) = NBD_CMD_WRITE; 473 - if (nbd->flags & NBD_FLAG_READ_ONLY) { 474 - dev_err(disk_to_dev(nbd->disk), 475 - "Write on read-only\n"); 476 - goto error_out; 477 - } 478 - } 479 - 480 - if (req->cmd_flags & REQ_FLUSH) { 481 - BUG_ON(unlikely(blk_rq_sectors(req))); 482 - nbd_cmd(req) = NBD_CMD_FLUSH; 455 + if (rq_data_dir(req) == WRITE && 456 + (nbd->flags & NBD_FLAG_READ_ONLY)) { 457 + dev_err(disk_to_dev(nbd->disk), 458 + "Write on read-only\n"); 459 + goto error_out; 483 460 } 484 461 485 462 req->errors = 0; ··· 591 592 fsync_bdev(bdev); 592 593 mutex_lock(&nbd->tx_lock); 593 594 blk_rq_init(NULL, &sreq); 594 - sreq.cmd_type = REQ_TYPE_SPECIAL; 595 - nbd_cmd(&sreq) = NBD_CMD_DISC; 595 + sreq.cmd_type = REQ_TYPE_DRV_PRIV; 596 596 597 597 /* Check again after getting mutex back. */ 598 598 if (!nbd->sock)

+2 -2

drivers/block/paride/pd.c

··· 442 442 443 443 static enum action do_pd_io_start(void) 444 444 { 445 - if (pd_req->cmd_type == REQ_TYPE_SPECIAL) { 445 + if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) { 446 446 phase = pd_special; 447 447 return pd_special(); 448 448 } ··· 725 725 if (IS_ERR(rq)) 726 726 return PTR_ERR(rq); 727 727 728 - rq->cmd_type = REQ_TYPE_SPECIAL; 728 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 729 729 rq->special = func; 730 730 731 731 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);

+2 -2

drivers/block/sx8.c

··· 620 620 spin_unlock_irq(&host->lock); 621 621 622 622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 623 - crq->rq->cmd_type = REQ_TYPE_SPECIAL; 623 + crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; 624 624 crq->rq->special = crq; 625 625 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 626 626 ··· 661 661 crq->msg_bucket = (u32) rc; 662 662 663 663 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 664 - crq->rq->cmd_type = REQ_TYPE_SPECIAL; 664 + crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; 665 665 crq->rq->special = crq; 666 666 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 667 667

+3 -3

drivers/block/virtio_blk.c

··· 124 124 req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); 125 125 req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); 126 126 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); 127 - } else if (req->cmd_type == REQ_TYPE_SPECIAL) { 127 + } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) { 128 128 req->errors = (error != 0); 129 129 } 130 130 ··· 188 188 vbr->out_hdr.sector = 0; 189 189 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 190 190 break; 191 - case REQ_TYPE_SPECIAL: 191 + case REQ_TYPE_DRV_PRIV: 192 192 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); 193 193 vbr->out_hdr.sector = 0; 194 194 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); ··· 251 251 return PTR_ERR(req); 252 252 } 253 253 254 - req->cmd_type = REQ_TYPE_SPECIAL; 254 + req->cmd_type = REQ_TYPE_DRV_PRIV; 255 255 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 256 256 blk_put_request(req); 257 257

+5 -5

drivers/ide/ide-atapi.c

··· 93 93 int error; 94 94 95 95 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 96 - rq->cmd_type = REQ_TYPE_SPECIAL; 96 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 97 97 rq->special = (char *)pc; 98 98 99 99 if (buf && bufflen) { ··· 191 191 192 192 BUG_ON(sense_len > sizeof(*sense)); 193 193 194 - if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed) 194 + if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed) 195 195 return; 196 196 197 197 memset(sense, 0, sizeof(*sense)); ··· 210 210 sense_rq->rq_disk = rq->rq_disk; 211 211 sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; 212 212 sense_rq->cmd[4] = cmd_len; 213 - sense_rq->cmd_type = REQ_TYPE_SENSE; 213 + sense_rq->cmd_type = REQ_TYPE_ATA_SENSE; 214 214 sense_rq->cmd_flags |= REQ_PREEMPT; 215 215 216 216 if (drive->media == ide_tape) ··· 310 310 switch (rq->cmd_type) { 311 311 case REQ_TYPE_FS: 312 312 return 32768; 313 - case REQ_TYPE_SENSE: 313 + case REQ_TYPE_ATA_SENSE: 314 314 case REQ_TYPE_BLOCK_PC: 315 315 case REQ_TYPE_ATA_PC: 316 316 return blk_rq_bytes(rq); ··· 477 477 if (uptodate == 0) 478 478 drive->failed_pc = NULL; 479 479 480 - if (rq->cmd_type == REQ_TYPE_SPECIAL) { 480 + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { 481 481 rq->errors = 0; 482 482 error = 0; 483 483 } else {

+5 -5

drivers/ide/ide-cd.c

··· 210 210 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) 211 211 { 212 212 /* 213 - * For REQ_TYPE_SENSE, "rq->special" points to the original 213 + * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original 214 214 * failed request. Also, the sense data should be read 215 215 * directly from rq which might be different from the original 216 216 * sense buffer if it got copied during mapping. ··· 285 285 "stat 0x%x", 286 286 rq->cmd[0], rq->cmd_type, err, stat); 287 287 288 - if (rq->cmd_type == REQ_TYPE_SENSE) { 288 + if (rq->cmd_type == REQ_TYPE_ATA_SENSE) { 289 289 /* 290 290 * We got an error trying to get sense info from the drive 291 291 * (probably while trying to recover from a former error). ··· 526 526 ide_expiry_t *expiry = NULL; 527 527 int dma_error = 0, dma, thislen, uptodate = 0; 528 528 int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0; 529 - int sense = (rq->cmd_type == REQ_TYPE_SENSE); 529 + int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE); 530 530 unsigned int timeout; 531 531 u16 len; 532 532 u8 ireason, stat; ··· 791 791 if (cdrom_start_rw(drive, rq) == ide_stopped) 792 792 goto out_end; 793 793 break; 794 - case REQ_TYPE_SENSE: 794 + case REQ_TYPE_ATA_SENSE: 795 795 case REQ_TYPE_BLOCK_PC: 796 796 case REQ_TYPE_ATA_PC: 797 797 if (!rq->timeout) ··· 799 799 800 800 cdrom_do_block_pc(drive, rq); 801 801 break; 802 - case REQ_TYPE_SPECIAL: 802 + case REQ_TYPE_DRV_PRIV: 803 803 /* right now this can only be a reset... */ 804 804 uptodate = 1; 805 805 goto out_end;

+1 -1

drivers/ide/ide-cd_ioctl.c

··· 304 304 int ret; 305 305 306 306 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 307 - rq->cmd_type = REQ_TYPE_SPECIAL; 307 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 308 308 rq->cmd_flags = REQ_QUIET; 309 309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); 310 310 blk_put_request(rq);

+1 -1

drivers/ide/ide-devsets.c

··· 166 166 return setting->set(drive, arg); 167 167 168 168 rq = blk_get_request(q, READ, __GFP_WAIT); 169 - rq->cmd_type = REQ_TYPE_SPECIAL; 169 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 170 170 rq->cmd_len = 5; 171 171 rq->cmd[0] = REQ_DEVSET_EXEC; 172 172 *(int *)&rq->cmd[1] = arg;

+2 -2

drivers/ide/ide-eh.c

··· 129 129 130 130 if (cmd) 131 131 ide_complete_cmd(drive, cmd, stat, err); 132 - } else if (blk_pm_request(rq)) { 132 + } else if (ata_pm_request(rq)) { 133 133 rq->errors = 1; 134 134 ide_complete_pm_rq(drive, rq); 135 135 return ide_stopped; ··· 147 147 { 148 148 struct request *rq = drive->hwif->rq; 149 149 150 - if (rq && rq->cmd_type == REQ_TYPE_SPECIAL && 150 + if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV && 151 151 rq->cmd[0] == REQ_DRIVE_RESET) { 152 152 if (err <= 0 && rq->errors == 0) 153 153 rq->errors = -EIO;

+4 -4

drivers/ide/ide-floppy.c

··· 97 97 "Aborting request!\n"); 98 98 } 99 99 100 - if (rq->cmd_type == REQ_TYPE_SPECIAL) 100 + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) 101 101 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; 102 102 103 103 return uptodate; ··· 246 246 } else 247 247 printk(KERN_ERR PFX "%s: I/O error\n", drive->name); 248 248 249 - if (rq->cmd_type == REQ_TYPE_SPECIAL) { 249 + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { 250 250 rq->errors = 0; 251 251 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 252 252 return ide_stopped; ··· 265 265 pc = &floppy->queued_pc; 266 266 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); 267 267 break; 268 - case REQ_TYPE_SPECIAL: 269 - case REQ_TYPE_SENSE: 268 + case REQ_TYPE_DRV_PRIV: 269 + case REQ_TYPE_ATA_SENSE: 270 270 pc = (struct ide_atapi_pc *)rq->special; 271 271 break; 272 272 case REQ_TYPE_BLOCK_PC:

+6 -6

drivers/ide/ide-io.c

··· 135 135 136 136 void ide_kill_rq(ide_drive_t *drive, struct request *rq) 137 137 { 138 - u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk; 138 + u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk; 139 139 u8 media = drive->media; 140 140 141 141 drive->failed_pc = NULL; ··· 320 320 goto kill_rq; 321 321 } 322 322 323 - if (blk_pm_request(rq)) 323 + if (ata_pm_request(rq)) 324 324 ide_check_pm_state(drive, rq); 325 325 326 326 drive->hwif->tp_ops->dev_select(drive); ··· 342 342 343 343 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) 344 344 return execute_drive_cmd(drive, rq); 345 - else if (blk_pm_request(rq)) { 346 - struct request_pm_state *pm = rq->special; 345 + else if (ata_pm_request(rq)) { 346 + struct ide_pm_state *pm = rq->special; 347 347 #ifdef DEBUG_PM 348 348 printk("%s: start_power_step(step: %d)\n", 349 349 drive->name, pm->pm_step); ··· 353 353 pm->pm_step == IDE_PM_COMPLETED) 354 354 ide_complete_pm_rq(drive, rq); 355 355 return startstop; 356 - } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL) 356 + } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV) 357 357 /* 358 358 * TODO: Once all ULDs have been modified to 359 359 * check for specific op codes rather than ··· 538 538 * state machine. 539 539 */ 540 540 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && 541 - blk_pm_request(rq) == 0 && 541 + ata_pm_request(rq) == 0 && 542 542 (rq->cmd_flags & REQ_PREEMPT) == 0) { 543 543 /* there should be no pending command at this point */ 544 544 ide_unlock_port(hwif);

+1 -1

drivers/ide/ide-ioctls.c

··· 222 222 int ret = 0; 223 223 224 224 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 225 - rq->cmd_type = REQ_TYPE_SPECIAL; 225 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 226 226 rq->cmd_len = 1; 227 227 rq->cmd[0] = REQ_DRIVE_RESET; 228 228 if (blk_execute_rq(drive->queue, NULL, rq, 1))

+2 -2

drivers/ide/ide-park.c

··· 34 34 rq = blk_get_request(q, READ, __GFP_WAIT); 35 35 rq->cmd[0] = REQ_PARK_HEADS; 36 36 rq->cmd_len = 1; 37 - rq->cmd_type = REQ_TYPE_SPECIAL; 37 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 38 38 rq->special = &timeout; 39 39 rc = blk_execute_rq(q, NULL, rq, 1); 40 40 blk_put_request(rq); ··· 51 51 52 52 rq->cmd[0] = REQ_UNPARK_HEADS; 53 53 rq->cmd_len = 1; 54 - rq->cmd_type = REQ_TYPE_SPECIAL; 54 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 55 55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); 56 56 57 57 out:

+43 -13

drivers/ide/ide-pm.c

··· 8 8 ide_drive_t *pair = ide_get_pair_dev(drive); 9 9 ide_hwif_t *hwif = drive->hwif; 10 10 struct request *rq; 11 - struct request_pm_state rqpm; 11 + struct ide_pm_state rqpm; 12 12 int ret; 13 13 14 14 if (ide_port_acpi(hwif)) { ··· 19 19 20 20 memset(&rqpm, 0, sizeof(rqpm)); 21 21 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 22 - rq->cmd_type = REQ_TYPE_PM_SUSPEND; 22 + rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; 23 23 rq->special = &rqpm; 24 24 rqpm.pm_step = IDE_PM_START_SUSPEND; 25 25 if (mesg.event == PM_EVENT_PRETHAW) ··· 38 38 return ret; 39 39 } 40 40 41 + static void ide_end_sync_rq(struct request *rq, int error) 42 + { 43 + complete(rq->end_io_data); 44 + } 45 + 46 + static int ide_pm_execute_rq(struct request *rq) 47 + { 48 + struct request_queue *q = rq->q; 49 + DECLARE_COMPLETION_ONSTACK(wait); 50 + 51 + rq->end_io_data = &wait; 52 + rq->end_io = ide_end_sync_rq; 53 + 54 + spin_lock_irq(q->queue_lock); 55 + if (unlikely(blk_queue_dying(q))) { 56 + rq->cmd_flags |= REQ_QUIET; 57 + rq->errors = -ENXIO; 58 + __blk_end_request_all(rq, rq->errors); 59 + spin_unlock_irq(q->queue_lock); 60 + return -ENXIO; 61 + } 62 + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); 63 + __blk_run_queue_uncond(q); 64 + spin_unlock_irq(q->queue_lock); 65 + 66 + wait_for_completion_io(&wait); 67 + 68 + return rq->errors ? -EIO : 0; 69 + } 70 + 41 71 int generic_ide_resume(struct device *dev) 42 72 { 43 73 ide_drive_t *drive = to_ide_device(dev); 44 74 ide_drive_t *pair = ide_get_pair_dev(drive); 45 75 ide_hwif_t *hwif = drive->hwif; 46 76 struct request *rq; 47 - struct request_pm_state rqpm; 77 + struct ide_pm_state rqpm; 48 78 int err; 49 79 50 80 if (ide_port_acpi(hwif)) { ··· 89 59 90 60 memset(&rqpm, 0, sizeof(rqpm)); 91 61 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 92 - rq->cmd_type = REQ_TYPE_PM_RESUME; 62 + rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; 93 63 rq->cmd_flags |= REQ_PREEMPT; 94 64 rq->special = &rqpm; 95 65 rqpm.pm_step = IDE_PM_START_RESUME; 96 66 rqpm.pm_state = PM_EVENT_ON; 97 67 98 - err = blk_execute_rq(drive->queue, NULL, rq, 1); 68 + err = ide_pm_execute_rq(rq); 99 69 blk_put_request(rq); 100 70 101 71 if (err == 0 && dev->driver) { ··· 110 80 111 81 void ide_complete_power_step(ide_drive_t *drive, struct request *rq) 112 82 { 113 - struct request_pm_state *pm = rq->special; 83 + struct ide_pm_state *pm = rq->special; 114 84 115 85 #ifdef DEBUG_PM 116 86 printk(KERN_INFO "%s: complete_power_step(step: %d)\n", ··· 140 110 141 111 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) 142 112 { 143 - struct request_pm_state *pm = rq->special; 113 + struct ide_pm_state *pm = rq->special; 144 114 struct ide_cmd cmd = { }; 145 115 146 116 switch (pm->pm_step) { ··· 212 182 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) 213 183 { 214 184 struct request_queue *q = drive->queue; 215 - struct request_pm_state *pm = rq->special; 185 + struct ide_pm_state *pm = rq->special; 216 186 unsigned long flags; 217 187 218 188 ide_complete_power_step(drive, rq); ··· 221 191 222 192 #ifdef DEBUG_PM 223 193 printk("%s: completing PM request, %s\n", drive->name, 224 - (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume"); 194 + (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume"); 225 195 #endif 226 196 spin_lock_irqsave(q->queue_lock, flags); 227 - if (rq->cmd_type == REQ_TYPE_PM_SUSPEND) 197 + if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) 228 198 blk_stop_queue(q); 229 199 else 230 200 drive->dev_flags &= ~IDE_DFLAG_BLOCKED; ··· 238 208 239 209 void ide_check_pm_state(ide_drive_t *drive, struct request *rq) 240 210 { 241 - struct request_pm_state *pm = rq->special; 211 + struct ide_pm_state *pm = rq->special; 242 212 243 - if (rq->cmd_type == REQ_TYPE_PM_SUSPEND && 213 + if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND && 244 214 pm->pm_step == IDE_PM_START_SUSPEND) 245 215 /* Mark drive blocked when starting the suspend sequence. */ 246 216 drive->dev_flags |= IDE_DFLAG_BLOCKED; 247 - else if (rq->cmd_type == REQ_TYPE_PM_RESUME && 217 + else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME && 248 218 pm->pm_step == IDE_PM_START_RESUME) { 249 219 /* 250 220 * The first thing we do on wakeup is to wait for BSY bit to

+3 -3

drivers/ide/ide-tape.c

··· 576 576 rq->cmd[0], (unsigned long long)blk_rq_pos(rq), 577 577 blk_rq_sectors(rq)); 578 578 579 - BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL || 580 - rq->cmd_type == REQ_TYPE_SENSE)); 579 + BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV || 580 + rq->cmd_type == REQ_TYPE_ATA_SENSE)); 581 581 582 582 /* Retry a failed packet command */ 583 583 if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) { ··· 853 853 BUG_ON(size < 0 || size % tape->blk_size); 854 854 855 855 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 856 - rq->cmd_type = REQ_TYPE_SPECIAL; 856 + rq->cmd_type = REQ_TYPE_DRV_PRIV; 857 857 rq->cmd[13] = cmd; 858 858 rq->rq_disk = tape->disk; 859 859 rq->__sector = tape->first_frame;

+1 -1

drivers/ide/ide-taskfile.c

··· 186 186 tf->command == ATA_CMD_CHK_POWER) { 187 187 struct request *rq = hwif->rq; 188 188 189 - if (blk_pm_request(rq)) 189 + if (ata_pm_request(rq)) 190 190 ide_complete_pm_rq(drive, rq); 191 191 else 192 192 ide_finish_cmd(drive, cmd, stat);

+1 -1

drivers/md/bcache/io.c

··· 55 55 56 56 s->bio->bi_end_io = s->bi_end_io; 57 57 s->bio->bi_private = s->bi_private; 58 - bio_endio_nodec(s->bio, 0); 58 + bio_endio(s->bio, 0); 59 59 60 60 closure_debug_destroy(&s->cl); 61 61 mempool_free(s, s->p->bio_split_hook);

+1 -1

drivers/md/bcache/request.c

··· 619 619 bio->bi_end_io = request_endio; 620 620 bio->bi_private = &s->cl; 621 621 622 - atomic_set(&bio->bi_cnt, 3); 622 + bio_cnt_set(bio, 3); 623 623 } 624 624 625 625 static void search_free(struct closure *cl)

-6

drivers/md/dm-cache-target.c

··· 86 86 { 87 87 bio->bi_end_io = h->bi_end_io; 88 88 bio->bi_private = h->bi_private; 89 - 90 - /* 91 - * Must bump bi_remaining to allow bio to complete with 92 - * restored bi_end_io. 93 - */ 94 - atomic_inc(&bio->bi_remaining); 95 89 } 96 90 97 91 /*----------------------------------------------------------------*/

-2

drivers/md/dm-raid1.c

··· 1254 1254 dm_bio_restore(bd, bio); 1255 1255 bio_record->details.bi_bdev = NULL; 1256 1256 1257 - atomic_inc(&bio->bi_remaining); 1258 - 1259 1257 queue_bio(ms, bio, rw); 1260 1258 return DM_ENDIO_INCOMPLETE; 1261 1259 }

-1

drivers/md/dm-snap.c

··· 1478 1478 if (full_bio) { 1479 1479 full_bio->bi_end_io = pe->full_bio_end_io; 1480 1480 full_bio->bi_private = pe->full_bio_private; 1481 - atomic_inc(&full_bio->bi_remaining); 1482 1481 } 1483 1482 increment_pending_exceptions_done_count(); 1484 1483

+16 -9

drivers/md/dm-table.c

··· 942 942 { 943 943 unsigned type = dm_table_get_type(t); 944 944 unsigned per_bio_data_size = 0; 945 - struct dm_target *tgt; 946 945 unsigned i; 947 946 948 - if (unlikely(type == DM_TYPE_NONE)) { 947 + switch (type) { 948 + case DM_TYPE_BIO_BASED: 949 + for (i = 0; i < t->num_targets; i++) { 950 + struct dm_target *tgt = t->targets + i; 951 + 952 + per_bio_data_size = max(per_bio_data_size, 953 + tgt->per_bio_data_size); 954 + } 955 + t->mempools = dm_alloc_bio_mempools(t->integrity_supported, 956 + per_bio_data_size); 957 + break; 958 + case DM_TYPE_REQUEST_BASED: 959 + case DM_TYPE_MQ_REQUEST_BASED: 960 + t->mempools = dm_alloc_rq_mempools(md, type); 961 + break; 962 + default: 949 963 DMWARN("no table type is set, can't allocate mempools"); 950 964 return -EINVAL; 951 965 } 952 966 953 - if (type == DM_TYPE_BIO_BASED) 954 - for (i = 0; i < t->num_targets; i++) { 955 - tgt = t->targets + i; 956 - per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); 957 - } 958 - 959 - t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size); 960 967 if (!t->mempools) 961 968 return -ENOMEM; 962 969

+3 -6

drivers/md/dm-thin.c

··· 793 793 794 794 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) 795 795 { 796 - if (m->bio) { 796 + if (m->bio) 797 797 m->bio->bi_end_io = m->saved_bi_end_io; 798 - atomic_inc(&m->bio->bi_remaining); 799 - } 798 + 800 799 cell_error(m->tc->pool, m->cell); 801 800 list_del(&m->list); 802 801 mempool_free(m, m->tc->pool->mapping_pool); ··· 809 810 int r; 810 811 811 812 bio = m->bio; 812 - if (bio) { 813 + if (bio) 813 814 bio->bi_end_io = m->saved_bi_end_io; 814 - atomic_inc(&bio->bi_remaining); 815 - } 816 815 817 816 if (m->err) { 818 817 cell_error(pool, m->cell);

+1 -1

drivers/md/dm-verity.c

··· 459 459 bio->bi_end_io = io->orig_bi_end_io; 460 460 bio->bi_private = io->orig_bi_private; 461 461 462 - bio_endio_nodec(bio, error); 462 + bio_endio(bio, error); 463 463 } 464 464 465 465 static void verity_work(struct work_struct *w)

+40 -131

drivers/md/dm.c

··· 990 990 dec_pending(io, error); 991 991 } 992 992 993 - /* 994 - * Partial completion handling for request-based dm 995 - */ 996 - static void end_clone_bio(struct bio *clone, int error) 997 - { 998 - struct dm_rq_clone_bio_info *info = 999 - container_of(clone, struct dm_rq_clone_bio_info, clone); 1000 - struct dm_rq_target_io *tio = info->tio; 1001 - struct bio *bio = info->orig; 1002 - unsigned int nr_bytes = info->orig->bi_iter.bi_size; 1003 - 1004 - bio_put(clone); 1005 - 1006 - if (tio->error) 1007 - /* 1008 - * An error has already been detected on the request. 1009 - * Once error occurred, just let clone->end_io() handle 1010 - * the remainder. 1011 - */ 1012 - return; 1013 - else if (error) { 1014 - /* 1015 - * Don't notice the error to the upper layer yet. 1016 - * The error handling decision is made by the target driver, 1017 - * when the request is completed. 1018 - */ 1019 - tio->error = error; 1020 - return; 1021 - } 1022 - 1023 - /* 1024 - * I/O for the bio successfully completed. 1025 - * Notice the data completion to the upper layer. 1026 - */ 1027 - 1028 - /* 1029 - * bios are processed from the head of the list. 1030 - * So the completing bio should always be rq->bio. 1031 - * If it's not, something wrong is happening. 1032 - */ 1033 - if (tio->orig->bio != bio) 1034 - DMERR("bio completion is going in the middle of the request"); 1035 - 1036 - /* 1037 - * Update the original request. 1038 - * Do not use blk_end_request() here, because it may complete 1039 - * the original request before the clone, and break the ordering. 1040 - */ 1041 - blk_update_request(tio->orig, 0, nr_bytes); 1042 - } 1043 - 1044 993 static struct dm_rq_target_io *tio_from_request(struct request *rq) 1045 994 { 1046 995 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); ··· 1035 1086 { 1036 1087 struct dm_rq_target_io *tio = clone->end_io_data; 1037 1088 struct mapped_device *md = tio->md; 1038 - 1039 - blk_rq_unprep_clone(clone); 1040 1089 1041 1090 if (md->type == DM_TYPE_MQ_REQUEST_BASED) 1042 1091 /* stacked on blk-mq queue(s) */ ··· 1774 1827 dm_complete_request(rq, r); 1775 1828 } 1776 1829 1777 - static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1778 - void *data) 1830 + static void setup_clone(struct request *clone, struct request *rq, 1831 + struct dm_rq_target_io *tio) 1779 1832 { 1780 - struct dm_rq_target_io *tio = data; 1781 - struct dm_rq_clone_bio_info *info = 1782 - container_of(bio, struct dm_rq_clone_bio_info, clone); 1783 - 1784 - info->orig = bio_orig; 1785 - info->tio = tio; 1786 - bio->bi_end_io = end_clone_bio; 1787 - 1788 - return 0; 1789 - } 1790 - 1791 - static int setup_clone(struct request *clone, struct request *rq, 1792 - struct dm_rq_target_io *tio, gfp_t gfp_mask) 1793 - { 1794 - int r; 1795 - 1796 - r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, 1797 - dm_rq_bio_constructor, tio); 1798 - if (r) 1799 - return r; 1800 - 1801 - clone->cmd = rq->cmd; 1802 - clone->cmd_len = rq->cmd_len; 1803 - clone->sense = rq->sense; 1833 + blk_rq_prep_clone(clone, rq); 1804 1834 clone->end_io = end_clone_request; 1805 1835 clone->end_io_data = tio; 1806 - 1807 1836 tio->clone = clone; 1808 - 1809 - return 0; 1810 1837 } 1811 1838 1812 1839 static struct request *clone_rq(struct request *rq, struct mapped_device *md, ··· 1801 1880 clone = tio->clone; 1802 1881 1803 1882 blk_rq_init(NULL, clone); 1804 - if (setup_clone(clone, rq, tio, gfp_mask)) { 1805 - /* -ENOMEM */ 1806 - if (alloc_clone) 1807 - free_clone_request(md, clone); 1808 - return NULL; 1809 - } 1883 + setup_clone(clone, rq, tio); 1810 1884 1811 1885 return clone; 1812 1886 } ··· 1895 1979 } 1896 1980 if (r != DM_MAPIO_REMAPPED) 1897 1981 return r; 1898 - if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 1899 - /* -ENOMEM */ 1900 - ti->type->release_clone_rq(clone); 1901 - return DM_MAPIO_REQUEUE; 1902 - } 1982 + setup_clone(clone, rq, tio); 1903 1983 } 1904 1984 1905 1985 switch (r) { ··· 2348 2436 */ 2349 2437 goto out; 2350 2438 } 2351 - 2352 - BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); 2353 2439 2354 2440 md->io_pool = p->io_pool; 2355 2441 p->io_pool = NULL; ··· 3454 3544 } 3455 3545 EXPORT_SYMBOL_GPL(dm_noflush_suspending); 3456 3546 3457 - struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, 3458 - unsigned integrity, unsigned per_bio_data_size) 3547 + struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, 3548 + unsigned per_bio_data_size) 3459 3549 { 3460 - struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3461 - struct kmem_cache *cachep = NULL; 3462 - unsigned int pool_size = 0; 3550 + struct dm_md_mempools *pools; 3551 + unsigned int pool_size = dm_get_reserved_bio_based_ios(); 3463 3552 unsigned int front_pad; 3464 3553 3554 + pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3465 3555 if (!pools) 3466 3556 return NULL; 3467 3557 3468 - type = filter_md_type(type, md); 3558 + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + 3559 + offsetof(struct dm_target_io, clone); 3469 3560 3470 - switch (type) { 3471 - case DM_TYPE_BIO_BASED: 3472 - cachep = _io_cache; 3473 - pool_size = dm_get_reserved_bio_based_ios(); 3474 - front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 3475 - break; 3476 - case DM_TYPE_REQUEST_BASED: 3477 - cachep = _rq_tio_cache; 3478 - pool_size = dm_get_reserved_rq_based_ios(); 3479 - pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); 3480 - if (!pools->rq_pool) 3481 - goto out; 3482 - /* fall through to setup remaining rq-based pools */ 3483 - case DM_TYPE_MQ_REQUEST_BASED: 3484 - if (!pool_size) 3485 - pool_size = dm_get_reserved_rq_based_ios(); 3486 - front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 3487 - /* per_bio_data_size is not used. See __bind_mempools(). */ 3488 - WARN_ON(per_bio_data_size != 0); 3489 - break; 3490 - default: 3491 - BUG(); 3492 - } 3493 - 3494 - if (cachep) { 3495 - pools->io_pool = mempool_create_slab_pool(pool_size, cachep); 3496 - if (!pools->io_pool) 3497 - goto out; 3498 - } 3561 + pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); 3562 + if (!pools->io_pool) 3563 + goto out; 3499 3564 3500 3565 pools->bs = bioset_create_nobvec(pool_size, front_pad); 3501 3566 if (!pools->bs) ··· 3480 3595 goto out; 3481 3596 3482 3597 return pools; 3483 - 3484 3598 out: 3485 3599 dm_free_md_mempools(pools); 3600 + return NULL; 3601 + } 3486 3602 3603 + struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, 3604 + unsigned type) 3605 + { 3606 + unsigned int pool_size = dm_get_reserved_rq_based_ios(); 3607 + struct dm_md_mempools *pools; 3608 + 3609 + pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3610 + if (!pools) 3611 + return NULL; 3612 + 3613 + if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) { 3614 + pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); 3615 + if (!pools->rq_pool) 3616 + goto out; 3617 + } 3618 + 3619 + pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache); 3620 + if (!pools->io_pool) 3621 + goto out; 3622 + 3623 + return pools; 3624 + out: 3625 + dm_free_md_mempools(pools); 3487 3626 return NULL; 3488 3627 } 3489 3628

+3 -2

drivers/md/dm.h

··· 222 222 /* 223 223 * Mempool operations 224 224 */ 225 - struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, 226 - unsigned integrity, unsigned per_bio_data_size); 225 + struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, 226 + unsigned per_bio_data_size); 227 + struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type); 227 228 void dm_free_md_mempools(struct dm_md_mempools *pools); 228 229 229 230 /*

+3 -10

fs/btrfs/disk-io.c

··· 1745 1745 bio->bi_private = end_io_wq->private; 1746 1746 bio->bi_end_io = end_io_wq->end_io; 1747 1747 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); 1748 - bio_endio_nodec(bio, error); 1748 + bio_endio(bio, error); 1749 1749 } 1750 1750 1751 1751 static int cleaner_kthread(void *arg) ··· 3269 3269 */ 3270 3270 static void btrfs_end_empty_barrier(struct bio *bio, int err) 3271 3271 { 3272 - if (err) { 3273 - if (err == -EOPNOTSUPP) 3274 - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 3272 + if (err) 3275 3273 clear_bit(BIO_UPTODATE, &bio->bi_flags); 3276 - } 3277 3274 if (bio->bi_private) 3278 3275 complete(bio->bi_private); 3279 3276 bio_put(bio); ··· 3298 3301 3299 3302 wait_for_completion(&device->flush_wait); 3300 3303 3301 - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 3302 - printk_in_rcu("BTRFS: disabling barriers on dev %s\n", 3303 - rcu_str_deref(device->name)); 3304 - device->nobarriers = 1; 3305 - } else if (!bio_flagged(bio, BIO_UPTODATE)) { 3304 + if (!bio_flagged(bio, BIO_UPTODATE)) { 3306 3305 ret = -EIO; 3307 3306 btrfs_dev_stat_inc_and_print(device, 3308 3307 BTRFS_DEV_STAT_FLUSH_ERRS);

-2

fs/btrfs/extent_io.c

··· 2767 2767 else 2768 2768 btrfsic_submit_bio(rw, bio); 2769 2769 2770 - if (bio_flagged(bio, BIO_EOPNOTSUPP)) 2771 - ret = -EOPNOTSUPP; 2772 2770 bio_put(bio); 2773 2771 return ret; 2774 2772 }

+6 -12

fs/btrfs/volumes.c

··· 345 345 waitqueue_active(&fs_info->async_submit_wait)) 346 346 wake_up(&fs_info->async_submit_wait); 347 347 348 - BUG_ON(atomic_read(&cur->bi_cnt) == 0); 348 + BUG_ON(atomic_read(&cur->__bi_cnt) == 0); 349 349 350 350 /* 351 351 * if we're doing the sync list, record that our ··· 5586 5586 5587 5587 static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) 5588 5588 { 5589 - if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) 5590 - bio_endio_nodec(bio, err); 5591 - else 5592 - bio_endio(bio, err); 5589 + bio->bi_private = bbio->private; 5590 + bio->bi_end_io = bbio->end_io; 5591 + bio_endio(bio, err); 5592 + 5593 5593 btrfs_put_bbio(bbio); 5594 5594 } 5595 5595 ··· 5633 5633 bio = bbio->orig_bio; 5634 5634 } 5635 5635 5636 - bio->bi_private = bbio->private; 5637 - bio->bi_end_io = bbio->end_io; 5638 5636 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5639 5637 /* only send an error to the higher layers if it is 5640 5638 * beyond the tolerance of the btrfs bio ··· 5814 5816 /* Shoud be the original bio. */ 5815 5817 WARN_ON(bio != bbio->orig_bio); 5816 5818 5817 - bio->bi_private = bbio->private; 5818 - bio->bi_end_io = bbio->end_io; 5819 5819 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5820 5820 bio->bi_iter.bi_sector = logical >> 9; 5821 5821 ··· 5894 5898 if (dev_nr < total_devs - 1) { 5895 5899 bio = btrfs_bio_clone(first_bio, GFP_NOFS); 5896 5900 BUG_ON(!bio); /* -ENOMEM */ 5897 - } else { 5901 + } else 5898 5902 bio = first_bio; 5899 - bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; 5900 - } 5901 5903 5902 5904 submit_stripe_bio(root, bbio, bio, 5903 5905 bbio->stripes[dev_nr].physical, dev_nr, rw,

-2

fs/btrfs/volumes.h

··· 292 292 struct btrfs_bio; 293 293 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); 294 294 295 - #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) 296 - 297 295 struct btrfs_bio { 298 296 atomic_t refs; 299 297 atomic_t stripes_pending;

+1 -12

fs/buffer.c

··· 2938 2938 { 2939 2939 struct buffer_head *bh = bio->bi_private; 2940 2940 2941 - if (err == -EOPNOTSUPP) { 2942 - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 2943 - } 2944 - 2945 2941 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) 2946 2942 set_bit(BH_Quiet, &bh->b_state); 2947 2943 ··· 2996 3000 int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) 2997 3001 { 2998 3002 struct bio *bio; 2999 - int ret = 0; 3000 3003 3001 3004 BUG_ON(!buffer_locked(bh)); 3002 3005 BUG_ON(!buffer_mapped(bh)); ··· 3036 3041 if (buffer_prio(bh)) 3037 3042 rw |= REQ_PRIO; 3038 3043 3039 - bio_get(bio); 3040 3044 submit_bio(rw, bio); 3041 - 3042 - if (bio_flagged(bio, BIO_EOPNOTSUPP)) 3043 - ret = -EOPNOTSUPP; 3044 - 3045 - bio_put(bio); 3046 - return ret; 3045 + return 0; 3047 3046 } 3048 3047 EXPORT_SYMBOL_GPL(_submit_bh); 3049 3048

-1

fs/ext4/page-io.c

··· 359 359 if (bio) { 360 360 bio_get(io->io_bio); 361 361 submit_bio(io->io_op, io->io_bio); 362 - BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); 363 362 bio_put(io->io_bio); 364 363 } 365 364 io->io_bio = NULL;

-12

fs/nilfs2/segbuf.c

··· 343 343 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 344 344 struct nilfs_segment_buffer *segbuf = bio->bi_private; 345 345 346 - if (err == -EOPNOTSUPP) { 347 - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 348 - /* to be detected by nilfs_segbuf_submit_bio() */ 349 - } 350 - 351 346 if (!uptodate) 352 347 atomic_inc(&segbuf->sb_err); 353 348 ··· 369 374 370 375 bio->bi_end_io = nilfs_end_bio_write; 371 376 bio->bi_private = segbuf; 372 - bio_get(bio); 373 377 submit_bio(mode, bio); 374 378 segbuf->sb_nbio++; 375 - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 376 - bio_put(bio); 377 - err = -EOPNOTSUPP; 378 - goto failed; 379 - } 380 - bio_put(bio); 381 379 382 380 wi->bio = NULL; 383 381 wi->rest_blocks -= wi->end - wi->start;

-1

fs/xfs/xfs_aops.c

··· 356 356 { 357 357 xfs_ioend_t *ioend = bio->bi_private; 358 358 359 - ASSERT(atomic_read(&bio->bi_cnt) >= 1); 360 359 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 361 360 362 361 /* Toss bio and pass work off to an xfsdatad thread */

+15 -2

include/linux/bio.h

··· 290 290 * returns. and then bio would be freed memory when if (bio->bi_flags ...) 291 291 * runs 292 292 */ 293 - #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) 293 + static inline void bio_get(struct bio *bio) 294 + { 295 + bio->bi_flags |= (1 << BIO_REFFED); 296 + smp_mb__before_atomic(); 297 + atomic_inc(&bio->__bi_cnt); 298 + } 299 + 300 + static inline void bio_cnt_set(struct bio *bio, unsigned int count) 301 + { 302 + if (count != 1) { 303 + bio->bi_flags |= (1 << BIO_REFFED); 304 + smp_mb__before_atomic(); 305 + } 306 + atomic_set(&bio->__bi_cnt, count); 307 + } 294 308 295 309 enum bip_flags { 296 310 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ ··· 427 413 } 428 414 429 415 extern void bio_endio(struct bio *, int); 430 - extern void bio_endio_nodec(struct bio *, int); 431 416 struct request_queue; 432 417 extern int bio_phys_segments(struct request_queue *, struct bio *); 433 418

+4

include/linux/blk-mq.h

··· 96 96 97 97 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, 98 98 bool); 99 + typedef void (busy_tag_iter_fn)(struct request *, void *, bool); 99 100 100 101 struct blk_mq_ops { 101 102 /* ··· 183 182 struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 184 183 gfp_t gfp, bool reserved); 185 184 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 185 + struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); 186 186 187 187 enum { 188 188 BLK_MQ_UNIQUE_TAG_BITS = 16, ··· 225 223 void blk_mq_run_hw_queues(struct request_queue *q, bool async); 226 224 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 227 225 void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 226 + void *priv); 227 + void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, 228 228 void *priv); 229 229 void blk_mq_freeze_queue(struct request_queue *q); 230 230 void blk_mq_unfreeze_queue(struct request_queue *q);

+13 -12

include/linux/blk_types.h

··· 65 65 unsigned int bi_seg_front_size; 66 66 unsigned int bi_seg_back_size; 67 67 68 - atomic_t bi_remaining; 68 + atomic_t __bi_remaining; 69 69 70 70 bio_end_io_t *bi_end_io; 71 71 ··· 92 92 93 93 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ 94 94 95 - atomic_t bi_cnt; /* pin count */ 95 + atomic_t __bi_cnt; /* pin count */ 96 96 97 97 struct bio_vec *bi_io_vec; /* the actual vec list */ 98 98 ··· 112 112 * bio flags 113 113 */ 114 114 #define BIO_UPTODATE 0 /* ok after I/O completion */ 115 - #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ 116 - #define BIO_EOF 2 /* out-out-bounds error */ 117 - #define BIO_SEG_VALID 3 /* bi_phys_segments valid */ 118 - #define BIO_CLONED 4 /* doesn't own data */ 119 - #define BIO_BOUNCED 5 /* bio is a bounce bio */ 120 - #define BIO_USER_MAPPED 6 /* contains user pages */ 121 - #define BIO_EOPNOTSUPP 7 /* not supported */ 122 - #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ 123 - #define BIO_QUIET 9 /* Make BIO Quiet */ 124 - #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ 115 + #define BIO_SEG_VALID 1 /* bi_phys_segments valid */ 116 + #define BIO_CLONED 2 /* doesn't own data */ 117 + #define BIO_BOUNCED 3 /* bio is a bounce bio */ 118 + #define BIO_USER_MAPPED 4 /* contains user pages */ 119 + #define BIO_NULL_MAPPED 5 /* contains invalid user pages */ 120 + #define BIO_QUIET 6 /* Make BIO Quiet */ 121 + #define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ 122 + #define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ 123 + #define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ 125 124 126 125 /* 127 126 * Flags starting here get preserved by bio_reset() - this includes ··· 192 193 __REQ_HASHED, /* on IO scheduler merge hash */ 193 194 __REQ_MQ_INFLIGHT, /* track inflight for MQ */ 194 195 __REQ_NO_TIMEOUT, /* requests may never expire */ 196 + __REQ_CLONE, /* cloned bios */ 195 197 __REQ_NR_BITS, /* stops here */ 196 198 }; 197 199 ··· 247 247 #define REQ_HASHED (1ULL << __REQ_HASHED) 248 248 #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) 249 249 #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) 250 + #define REQ_CLONE (1ULL << __REQ_CLONE) 250 251 251 252 #endif /* __LINUX_BLK_TYPES_H */

+7 -38

include/linux/blkdev.h

··· 30 30 31 31 struct request_queue; 32 32 struct elevator_queue; 33 - struct request_pm_state; 34 33 struct blk_trace; 35 34 struct request; 36 35 struct sg_io_hdr; ··· 74 75 enum rq_cmd_type_bits { 75 76 REQ_TYPE_FS = 1, /* fs request */ 76 77 REQ_TYPE_BLOCK_PC, /* scsi command */ 77 - REQ_TYPE_SENSE, /* sense request */ 78 - REQ_TYPE_PM_SUSPEND, /* suspend request */ 79 - REQ_TYPE_PM_RESUME, /* resume request */ 80 - REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 81 - REQ_TYPE_SPECIAL, /* driver defined type */ 82 - /* 83 - * for ATA/ATAPI devices. this really doesn't belong here, ide should 84 - * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 85 - * private REQ_LB opcodes to differentiate what type of request this is 86 - */ 87 - REQ_TYPE_ATA_TASKFILE, 88 - REQ_TYPE_ATA_PC, 78 + REQ_TYPE_DRV_PRIV, /* driver defined types from here */ 89 79 }; 90 80 91 81 #define BLK_MAX_CDB 16 ··· 96 108 struct blk_mq_ctx *mq_ctx; 97 109 98 110 u64 cmd_flags; 99 - enum rq_cmd_type_bits cmd_type; 111 + unsigned cmd_type; 100 112 unsigned long atomic_flags; 101 113 102 114 int cpu; ··· 203 215 { 204 216 return req->ioprio; 205 217 } 206 - 207 - /* 208 - * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 209 - * requests. Some step values could eventually be made generic. 210 - */ 211 - struct request_pm_state 212 - { 213 - /* PM state machine step value, currently driver specific */ 214 - int pm_step; 215 - /* requested PM state value (S1, S2, S3, S4, ...) */ 216 - u32 pm_state; 217 - void* data; /* for driver use */ 218 - }; 219 218 220 219 #include <linux/elevator.h> 221 220 ··· 444 469 struct mutex sysfs_lock; 445 470 446 471 int bypass_depth; 447 - int mq_freeze_depth; 472 + atomic_t mq_freeze_depth; 448 473 449 474 #if defined(CONFIG_BLK_DEV_BSG) 450 475 bsg_job_fn *bsg_job_fn; ··· 584 609 #define blk_account_rq(rq) \ 585 610 (((rq)->cmd_flags & REQ_STARTED) && \ 586 611 ((rq)->cmd_type == REQ_TYPE_FS)) 587 - 588 - #define blk_pm_request(rq) \ 589 - ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ 590 - (rq)->cmd_type == REQ_TYPE_PM_RESUME) 591 612 592 613 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 593 614 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) ··· 775 804 unsigned int len); 776 805 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); 777 806 extern int blk_lld_busy(struct request_queue *q); 778 - extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 779 - struct bio_set *bs, gfp_t gfp_mask, 780 - int (*bio_ctr)(struct bio *, struct bio *, void *), 781 - void *data); 782 - extern void blk_rq_unprep_clone(struct request *rq); 807 + extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); 783 808 extern int blk_insert_cloned_request(struct request_queue *q, 784 809 struct request *rq); 785 810 extern void blk_delay_queue(struct request_queue *, unsigned long); ··· 812 845 extern void blk_sync_queue(struct request_queue *q); 813 846 extern void __blk_stop_queue(struct request_queue *q); 814 847 extern void __blk_run_queue(struct request_queue *q); 848 + extern void __blk_run_queue_uncond(struct request_queue *q); 815 849 extern void blk_run_queue(struct request_queue *); 816 850 extern void blk_run_queue_async(struct request_queue *q); 817 851 extern int blk_rq_map_user(struct request_queue *, struct request *, ··· 901 933 if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) 902 934 return q->limits.max_hw_sectors; 903 935 904 - if (!q->limits.chunk_sectors) 936 + if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) 905 937 return blk_queue_get_max_sectors(q, rq->cmd_flags); 906 938 907 939 return min(blk_max_size_offset(q, blk_rq_pos(rq)), ··· 1022 1054 struct request_queue *blk_alloc_queue(gfp_t); 1023 1055 struct request_queue *blk_alloc_queue_node(gfp_t, int); 1024 1056 extern void blk_put_queue(struct request_queue *); 1057 + extern void blk_set_queue_dying(struct request_queue *); 1025 1058 1026 1059 /* 1027 1060 * block layer runtime pm functions

+2

include/linux/elevator.h

··· 39 39 typedef int (elevator_init_fn) (struct request_queue *, 40 40 struct elevator_type *e); 41 41 typedef void (elevator_exit_fn) (struct elevator_queue *); 42 + typedef void (elevator_registered_fn) (struct request_queue *); 42 43 43 44 struct elevator_ops 44 45 { ··· 69 68 70 69 elevator_init_fn *elevator_init_fn; 71 70 elevator_exit_fn *elevator_exit_fn; 71 + elevator_registered_fn *elevator_registered_fn; 72 72 }; 73 73 74 74 #define ELV_NAME_MAX (16)

+3

include/linux/fs.h

··· 2280 2280 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, 2281 2281 void *holder); 2282 2282 extern void blkdev_put(struct block_device *bdev, fmode_t mode); 2283 + extern int __blkdev_reread_part(struct block_device *bdev); 2284 + extern int blkdev_reread_part(struct block_device *bdev); 2285 + 2283 2286 #ifdef CONFIG_SYSFS 2284 2287 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); 2285 2288 extern void bd_unlink_disk_holder(struct block_device *bdev,

+27

include/linux/ide.h

··· 39 39 40 40 struct device; 41 41 42 + /* IDE-specific values for req->cmd_type */ 43 + enum ata_cmd_type_bits { 44 + REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, 45 + REQ_TYPE_ATA_PC, 46 + REQ_TYPE_ATA_SENSE, /* sense request */ 47 + REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ 48 + REQ_TYPE_ATA_PM_RESUME, /* resume request */ 49 + }; 50 + 51 + #define ata_pm_request(rq) \ 52 + ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ 53 + (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) 54 + 42 55 /* Error codes returned in rq->errors to the higher part of the driver. */ 43 56 enum { 44 57 IDE_DRV_ERROR_GENERAL = 101, ··· 1327 1314 u8 udma_mask; 1328 1315 }; 1329 1316 1317 + /* 1318 + * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME 1319 + * requests. 1320 + */ 1321 + struct ide_pm_state { 1322 + /* PM state machine step value, currently driver specific */ 1323 + int pm_step; 1324 + /* requested PM state value (S1, S2, S3, S4, ...) */ 1325 + u32 pm_state; 1326 + void* data; /* for driver use */ 1327 + }; 1328 + 1329 + 1330 1330 int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); 1331 1331 int ide_pci_init_two(struct pci_dev *, struct pci_dev *, 1332 1332 const struct ide_port_info *, void *); ··· 1576 1550 1577 1551 #define ide_host_for_each_port(i, port, host) \ 1578 1552 for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) 1553 + 1579 1554 1580 1555 #endif /* _IDE_H */

-1

include/linux/swap.h

··· 377 377 extern int __swap_writepage(struct page *page, struct writeback_control *wbc, 378 378 void (*end_write_func)(struct bio *, int)); 379 379 extern int swap_set_page_dirty(struct page *page); 380 - extern void end_swap_bio_read(struct bio *bio, int err); 381 380 382 381 int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, 383 382 unsigned long nr_pages, sector_t start_block);

-2

include/uapi/linux/nbd.h

··· 44 44 /* there is a gap here to match userspace */ 45 45 #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ 46 46 47 - #define nbd_cmd(req) ((req)->cmd[0]) 48 - 49 47 /* userspace doesn't need the nbd_device structure */ 50 48 51 49 /* These are sent over the network in the request/reply magic fields */

+1 -2

kernel/power/Makefile

··· 7 7 obj-$(CONFIG_FREEZER) += process.o 8 8 obj-$(CONFIG_SUSPEND) += suspend.o 9 9 obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 10 - obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 11 - block_io.o 10 + obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o 12 11 obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o 13 12 obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o 14 13

-103

kernel/power/block_io.c

··· 1 - /* 2 - * This file provides functions for block I/O operations on swap/file. 3 - * 4 - * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> 5 - * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 6 - * 7 - * This file is released under the GPLv2. 8 - */ 9 - 10 - #include <linux/bio.h> 11 - #include <linux/kernel.h> 12 - #include <linux/pagemap.h> 13 - #include <linux/swap.h> 14 - 15 - #include "power.h" 16 - 17 - /** 18 - * submit - submit BIO request. 19 - * @rw: READ or WRITE. 20 - * @off physical offset of page. 21 - * @page: page we're reading or writing. 22 - * @bio_chain: list of pending biod (for async reading) 23 - * 24 - * Straight from the textbook - allocate and initialize the bio. 25 - * If we're reading, make sure the page is marked as dirty. 26 - * Then submit it and, if @bio_chain == NULL, wait. 27 - */ 28 - static int submit(int rw, struct block_device *bdev, sector_t sector, 29 - struct page *page, struct bio **bio_chain) 30 - { 31 - const int bio_rw = rw | REQ_SYNC; 32 - struct bio *bio; 33 - 34 - bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 35 - bio->bi_iter.bi_sector = sector; 36 - bio->bi_bdev = bdev; 37 - bio->bi_end_io = end_swap_bio_read; 38 - 39 - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 40 - printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", 41 - (unsigned long long)sector); 42 - bio_put(bio); 43 - return -EFAULT; 44 - } 45 - 46 - lock_page(page); 47 - bio_get(bio); 48 - 49 - if (bio_chain == NULL) { 50 - submit_bio(bio_rw, bio); 51 - wait_on_page_locked(page); 52 - if (rw == READ) 53 - bio_set_pages_dirty(bio); 54 - bio_put(bio); 55 - } else { 56 - if (rw == READ) 57 - get_page(page); /* These pages are freed later */ 58 - bio->bi_private = *bio_chain; 59 - *bio_chain = bio; 60 - submit_bio(bio_rw, bio); 61 - } 62 - return 0; 63 - } 64 - 65 - int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) 66 - { 67 - return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), 68 - virt_to_page(addr), bio_chain); 69 - } 70 - 71 - int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) 72 - { 73 - return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), 74 - virt_to_page(addr), bio_chain); 75 - } 76 - 77 - int hib_wait_on_bio_chain(struct bio **bio_chain) 78 - { 79 - struct bio *bio; 80 - struct bio *next_bio; 81 - int ret = 0; 82 - 83 - if (bio_chain == NULL) 84 - return 0; 85 - 86 - bio = *bio_chain; 87 - if (bio == NULL) 88 - return 0; 89 - while (bio) { 90 - struct page *page; 91 - 92 - next_bio = bio->bi_private; 93 - page = bio->bi_io_vec[0].bv_page; 94 - wait_on_page_locked(page); 95 - if (!PageUptodate(page) || PageError(page)) 96 - ret = -EIO; 97 - put_page(page); 98 - bio_put(bio); 99 - bio = next_bio; 100 - } 101 - *bio_chain = NULL; 102 - return ret; 103 - }

-9

kernel/power/power.h

··· 163 163 extern int swsusp_unmark(void); 164 164 #endif 165 165 166 - /* kernel/power/block_io.c */ 167 - extern struct block_device *hib_resume_bdev; 168 - 169 - extern int hib_bio_read_page(pgoff_t page_off, void *addr, 170 - struct bio **bio_chain); 171 - extern int hib_bio_write_page(pgoff_t page_off, void *addr, 172 - struct bio **bio_chain); 173 - extern int hib_wait_on_bio_chain(struct bio **bio_chain); 174 - 175 166 struct timeval; 176 167 /* kernel/power/swsusp.c */ 177 168 extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);

+120 -39

kernel/power/swap.c

··· 212 212 */ 213 213 214 214 static unsigned short root_swap = 0xffff; 215 - struct block_device *hib_resume_bdev; 215 + static struct block_device *hib_resume_bdev; 216 + 217 + struct hib_bio_batch { 218 + atomic_t count; 219 + wait_queue_head_t wait; 220 + int error; 221 + }; 222 + 223 + static void hib_init_batch(struct hib_bio_batch *hb) 224 + { 225 + atomic_set(&hb->count, 0); 226 + init_waitqueue_head(&hb->wait); 227 + hb->error = 0; 228 + } 229 + 230 + static void hib_end_io(struct bio *bio, int error) 231 + { 232 + struct hib_bio_batch *hb = bio->bi_private; 233 + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 234 + struct page *page = bio->bi_io_vec[0].bv_page; 235 + 236 + if (!uptodate || error) { 237 + printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", 238 + imajor(bio->bi_bdev->bd_inode), 239 + iminor(bio->bi_bdev->bd_inode), 240 + (unsigned long long)bio->bi_iter.bi_sector); 241 + 242 + if (!error) 243 + error = -EIO; 244 + } 245 + 246 + if (bio_data_dir(bio) == WRITE) 247 + put_page(page); 248 + 249 + if (error && !hb->error) 250 + hb->error = error; 251 + if (atomic_dec_and_test(&hb->count)) 252 + wake_up(&hb->wait); 253 + 254 + bio_put(bio); 255 + } 256 + 257 + static int hib_submit_io(int rw, pgoff_t page_off, void *addr, 258 + struct hib_bio_batch *hb) 259 + { 260 + struct page *page = virt_to_page(addr); 261 + struct bio *bio; 262 + int error = 0; 263 + 264 + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 265 + bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); 266 + bio->bi_bdev = hib_resume_bdev; 267 + 268 + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 269 + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", 270 + (unsigned long long)bio->bi_iter.bi_sector); 271 + bio_put(bio); 272 + return -EFAULT; 273 + } 274 + 275 + if (hb) { 276 + bio->bi_end_io = hib_end_io; 277 + bio->bi_private = hb; 278 + atomic_inc(&hb->count); 279 + submit_bio(rw, bio); 280 + } else { 281 + error = submit_bio_wait(rw, bio); 282 + bio_put(bio); 283 + } 284 + 285 + return error; 286 + } 287 + 288 + static int hib_wait_io(struct hib_bio_batch *hb) 289 + { 290 + wait_event(hb->wait, atomic_read(&hb->count) == 0); 291 + return hb->error; 292 + } 216 293 217 294 /* 218 295 * Saving part ··· 299 222 { 300 223 int error; 301 224 302 - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); 225 + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); 303 226 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || 304 227 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { 305 228 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); ··· 308 231 swsusp_header->flags = flags; 309 232 if (flags & SF_CRC32_MODE) 310 233 swsusp_header->crc32 = handle->crc32; 311 - error = hib_bio_write_page(swsusp_resume_block, 234 + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, 312 235 swsusp_header, NULL); 313 236 } else { 314 237 printk(KERN_ERR "PM: Swap header not found!\n"); ··· 348 271 * write_page - Write one page to given swap location. 349 272 * @buf: Address we're writing. 350 273 * @offset: Offset of the swap page we're writing to. 351 - * @bio_chain: Link the next write BIO here 274 + * @hb: bio completion batch 352 275 */ 353 276 354 - static int write_page(void *buf, sector_t offset, struct bio **bio_chain) 277 + static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) 355 278 { 356 279 void *src; 357 280 int ret; ··· 359 282 if (!offset) 360 283 return -ENOSPC; 361 284 362 - if (bio_chain) { 285 + if (hb) { 363 286 src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | 364 287 __GFP_NORETRY); 365 288 if (src) { 366 289 copy_page(src, buf); 367 290 } else { 368 - ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ 291 + ret = hib_wait_io(hb); /* Free pages */ 369 292 if (ret) 370 293 return ret; 371 294 src = (void *)__get_free_page(__GFP_WAIT | ··· 375 298 copy_page(src, buf); 376 299 } else { 377 300 WARN_ON_ONCE(1); 378 - bio_chain = NULL; /* Go synchronous */ 301 + hb = NULL; /* Go synchronous */ 379 302 src = buf; 380 303 } 381 304 } 382 305 } else { 383 306 src = buf; 384 307 } 385 - return hib_bio_write_page(offset, src, bio_chain); 308 + return hib_submit_io(WRITE_SYNC, offset, src, hb); 386 309 } 387 310 388 311 static void release_swap_writer(struct swap_map_handle *handle) ··· 425 348 } 426 349 427 350 static int swap_write_page(struct swap_map_handle *handle, void *buf, 428 - struct bio **bio_chain) 351 + struct hib_bio_batch *hb) 429 352 { 430 353 int error = 0; 431 354 sector_t offset; ··· 433 356 if (!handle->cur) 434 357 return -EINVAL; 435 358 offset = alloc_swapdev_block(root_swap); 436 - error = write_page(buf, offset, bio_chain); 359 + error = write_page(buf, offset, hb); 437 360 if (error) 438 361 return error; 439 362 handle->cur->entries[handle->k++] = offset; ··· 442 365 if (!offset) 443 366 return -ENOSPC; 444 367 handle->cur->next_swap = offset; 445 - error = write_page(handle->cur, handle->cur_swap, bio_chain); 368 + error = write_page(handle->cur, handle->cur_swap, hb); 446 369 if (error) 447 370 goto out; 448 371 clear_page(handle->cur); 449 372 handle->cur_swap = offset; 450 373 handle->k = 0; 451 374 452 - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { 453 - error = hib_wait_on_bio_chain(bio_chain); 375 + if (hb && low_free_pages() <= handle->reqd_free_pages) { 376 + error = hib_wait_io(hb); 454 377 if (error) 455 378 goto out; 456 379 /* ··· 522 445 int ret; 523 446 int nr_pages; 524 447 int err2; 525 - struct bio *bio; 448 + struct hib_bio_batch hb; 526 449 ktime_t start; 527 450 ktime_t stop; 451 + 452 + hib_init_batch(&hb); 528 453 529 454 printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", 530 455 nr_to_write); ··· 534 455 if (!m) 535 456 m = 1; 536 457 nr_pages = 0; 537 - bio = NULL; 538 458 start = ktime_get(); 539 459 while (1) { 540 460 ret = snapshot_read_next(snapshot); 541 461 if (ret <= 0) 542 462 break; 543 - ret = swap_write_page(handle, data_of(*snapshot), &bio); 463 + ret = swap_write_page(handle, data_of(*snapshot), &hb); 544 464 if (ret) 545 465 break; 546 466 if (!(nr_pages % m)) ··· 547 469 nr_pages / m * 10); 548 470 nr_pages++; 549 471 } 550 - err2 = hib_wait_on_bio_chain(&bio); 472 + err2 = hib_wait_io(&hb); 551 473 stop = ktime_get(); 552 474 if (!ret) 553 475 ret = err2; ··· 658 580 int ret = 0; 659 581 int nr_pages; 660 582 int err2; 661 - struct bio *bio; 583 + struct hib_bio_batch hb; 662 584 ktime_t start; 663 585 ktime_t stop; 664 586 size_t off; ··· 666 588 unsigned char *page = NULL; 667 589 struct cmp_data *data = NULL; 668 590 struct crc_data *crc = NULL; 591 + 592 + hib_init_batch(&hb); 669 593 670 594 /* 671 595 * We'll limit the number of threads for compression to limit memory ··· 754 674 if (!m) 755 675 m = 1; 756 676 nr_pages = 0; 757 - bio = NULL; 758 677 start = ktime_get(); 759 678 for (;;) { 760 679 for (thr = 0; thr < nr_threads; thr++) { ··· 827 748 off += PAGE_SIZE) { 828 749 memcpy(page, data[thr].cmp + off, PAGE_SIZE); 829 750 830 - ret = swap_write_page(handle, page, &bio); 751 + ret = swap_write_page(handle, page, &hb); 831 752 if (ret) 832 753 goto out_finish; 833 754 } ··· 838 759 } 839 760 840 761 out_finish: 841 - err2 = hib_wait_on_bio_chain(&bio); 762 + err2 = hib_wait_io(&hb); 842 763 stop = ktime_get(); 843 764 if (!ret) 844 765 ret = err2; ··· 985 906 return -ENOMEM; 986 907 } 987 908 988 - error = hib_bio_read_page(offset, tmp->map, NULL); 909 + error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL); 989 910 if (error) { 990 911 release_swap_reader(handle); 991 912 return error; ··· 998 919 } 999 920 1000 921 static int swap_read_page(struct swap_map_handle *handle, void *buf, 1001 - struct bio **bio_chain) 922 + struct hib_bio_batch *hb) 1002 923 { 1003 924 sector_t offset; 1004 925 int error; ··· 1009 930 offset = handle->cur->entries[handle->k]; 1010 931 if (!offset) 1011 932 return -EFAULT; 1012 - error = hib_bio_read_page(offset, buf, bio_chain); 933 + error = hib_submit_io(READ_SYNC, offset, buf, hb); 1013 934 if (error) 1014 935 return error; 1015 936 if (++handle->k >= MAP_PAGE_ENTRIES) { ··· 1047 968 int ret = 0; 1048 969 ktime_t start; 1049 970 ktime_t stop; 1050 - struct bio *bio; 971 + struct hib_bio_batch hb; 1051 972 int err2; 1052 973 unsigned nr_pages; 974 + 975 + hib_init_batch(&hb); 1053 976 1054 977 printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", 1055 978 nr_to_read); ··· 1059 978 if (!m) 1060 979 m = 1; 1061 980 nr_pages = 0; 1062 - bio = NULL; 1063 981 start = ktime_get(); 1064 982 for ( ; ; ) { 1065 983 ret = snapshot_write_next(snapshot); 1066 984 if (ret <= 0) 1067 985 break; 1068 - ret = swap_read_page(handle, data_of(*snapshot), &bio); 986 + ret = swap_read_page(handle, data_of(*snapshot), &hb); 1069 987 if (ret) 1070 988 break; 1071 989 if (snapshot->sync_read) 1072 - ret = hib_wait_on_bio_chain(&bio); 990 + ret = hib_wait_io(&hb); 1073 991 if (ret) 1074 992 break; 1075 993 if (!(nr_pages % m)) ··· 1076 996 nr_pages / m * 10); 1077 997 nr_pages++; 1078 998 } 1079 - err2 = hib_wait_on_bio_chain(&bio); 999 + err2 = hib_wait_io(&hb); 1080 1000 stop = ktime_get(); 1081 1001 if (!ret) 1082 1002 ret = err2; ··· 1147 1067 unsigned int m; 1148 1068 int ret = 0; 1149 1069 int eof = 0; 1150 - struct bio *bio; 1070 + struct hib_bio_batch hb; 1151 1071 ktime_t start; 1152 1072 ktime_t stop; 1153 1073 unsigned nr_pages; ··· 1159 1079 unsigned char **page = NULL; 1160 1080 struct dec_data *data = NULL; 1161 1081 struct crc_data *crc = NULL; 1082 + 1083 + hib_init_batch(&hb); 1162 1084 1163 1085 /* 1164 1086 * We'll limit the number of threads for decompression to limit memory ··· 1272 1190 if (!m) 1273 1191 m = 1; 1274 1192 nr_pages = 0; 1275 - bio = NULL; 1276 1193 start = ktime_get(); 1277 1194 1278 1195 ret = snapshot_write_next(snapshot); ··· 1280 1199 1281 1200 for(;;) { 1282 1201 for (i = 0; !eof && i < want; i++) { 1283 - ret = swap_read_page(handle, page[ring], &bio); 1202 + ret = swap_read_page(handle, page[ring], &hb); 1284 1203 if (ret) { 1285 1204 /* 1286 1205 * On real read error, finish. On end of data, ··· 1307 1226 if (!asked) 1308 1227 break; 1309 1228 1310 - ret = hib_wait_on_bio_chain(&bio); 1229 + ret = hib_wait_io(&hb); 1311 1230 if (ret) 1312 1231 goto out_finish; 1313 1232 have += asked; ··· 1362 1281 * Wait for more data while we are decompressing. 1363 1282 */ 1364 1283 if (have < LZO_CMP_PAGES && asked) { 1365 - ret = hib_wait_on_bio_chain(&bio); 1284 + ret = hib_wait_io(&hb); 1366 1285 if (ret) 1367 1286 goto out_finish; 1368 1287 have += asked; ··· 1511 1430 if (!IS_ERR(hib_resume_bdev)) { 1512 1431 set_blocksize(hib_resume_bdev, PAGE_SIZE); 1513 1432 clear_page(swsusp_header); 1514 - error = hib_bio_read_page(swsusp_resume_block, 1433 + error = hib_submit_io(READ_SYNC, swsusp_resume_block, 1515 1434 swsusp_header, NULL); 1516 1435 if (error) 1517 1436 goto put; ··· 1519 1438 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { 1520 1439 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); 1521 1440 /* Reset swap signature now */ 1522 - error = hib_bio_write_page(swsusp_resume_block, 1441 + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, 1523 1442 swsusp_header, NULL); 1524 1443 } else { 1525 1444 error = -EINVAL; ··· 1563 1482 { 1564 1483 int error; 1565 1484 1566 - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); 1485 + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); 1567 1486 if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { 1568 1487 memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); 1569 - error = hib_bio_write_page(swsusp_resume_block, 1488 + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, 1570 1489 swsusp_header, NULL); 1571 1490 } else { 1572 1491 printk(KERN_ERR "PM: Cannot find swsusp signature!\n");

+1 -1

mm/page_io.c

··· 69 69 bio_put(bio); 70 70 } 71 71 72 - void end_swap_bio_read(struct bio *bio, int err) 72 + static void end_swap_bio_read(struct bio *bio, int err) 73 73 { 74 74 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 75 75 struct page *page = bio->bi_io_vec[0].bv_page;