Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: drop request->hard_* and *nr_sectors

struct request has had a few different ways to represent some
properties of a request. ->hard_* represent block layer's view of the
request progress (completion cursor) and the ones without the prefix
are supposed to represent the issue cursor and allowed to be updated
as necessary by the low level drivers. The thing is that as block
layer supports partial completion, the two cursors really aren't
necessary and only cause confusion. In addition, manual management of
request detail from low level drivers is cumbersome and error-prone at
the very least.

Another interesting duplicate fields are rq->[hard_]nr_sectors and
rq->{hard_cur|current}_nr_sectors against rq->data_len and
rq->bio->bi_size. This is more convoluted than the hard_ case.

rq->[hard_]nr_sectors are initialized for requests with bio but
blk_rq_bytes() uses it only for !pc requests. rq->data_len is
initialized for all request but blk_rq_bytes() uses it only for pc
requests. This causes good amount of confusion throughout block layer
and its drivers and determining the request length has been a bit of
black magic which may or may not work depending on circumstances and
what the specific LLD is actually doing.

rq->{hard_cur|current}_nr_sectors represent the number of sectors in
the contiguous data area at the front. This is mainly used by drivers
which transfers data by walking request segment-by-segment. This
value always equals rq->bio->bi_size >> 9. However, data length for
pc requests may not be multiple of 512 bytes and using this field
becomes a bit confusing.

In general, having multiple fields to represent the same property
leads only to confusion and subtle bugs. With recent block low level
driver cleanups, no driver is accessing or manipulating these
duplicate fields directly. Drop all the duplicates. Now rq->sector
means the current sector, rq->data_len the current total length and
rq->bio->bi_size the current segment length. Everything else is
defined in terms of these three and available only through accessors.

* blk_recalc_rq_sectors() is collapsed into blk_update_request() and
now handles pc and fs requests equally other than rq->sector update.
This means that now pc requests can use partial completion too (no
in-kernel user yet tho).

* bio_cur_sectors() is replaced with bio_cur_bytes() as block layer
now uses byte count as the primary data length.

* blk_rq_pos() is now guranteed to be always correct. In-block users
converted.

* blk_rq_bytes() is now guaranteed to be always valid as is
blk_rq_sectors(). In-block users converted.

* blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9.
More convenient one is used.

* blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const
pointer to request.

[ Impact: API cleanup, single way to represent one property of a request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

authored by

Tejun Heo and committed by
Jens Axboe
2e46e8b2 9780e2dd

+67 -122
+30 -51
block/blk-core.c
··· 127 127 INIT_LIST_HEAD(&rq->timeout_list); 128 128 rq->cpu = -1; 129 129 rq->q = q; 130 - rq->sector = rq->hard_sector = (sector_t) -1; 130 + rq->sector = (sector_t) -1; 131 131 INIT_HLIST_NODE(&rq->hash); 132 132 RB_CLEAR_NODE(&rq->rb_node); 133 133 rq->cmd = rq->__cmd; ··· 189 189 (unsigned long long)blk_rq_pos(rq), 190 190 blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 191 191 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", 192 - rq->bio, rq->biotail, 193 - rq->buffer, rq->data_len); 192 + rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); 194 193 195 194 if (blk_pc_request(rq)) { 196 195 printk(KERN_INFO " cdb: "); ··· 1095 1096 req->cmd_flags |= REQ_NOIDLE; 1096 1097 1097 1098 req->errors = 0; 1098 - req->hard_sector = req->sector = bio->bi_sector; 1099 + req->sector = bio->bi_sector; 1099 1100 req->ioprio = bio_prio(bio); 1100 1101 blk_rq_bio_prep(req->q, req, bio); 1101 1102 } ··· 1112 1113 static int __make_request(struct request_queue *q, struct bio *bio) 1113 1114 { 1114 1115 struct request *req; 1115 - int el_ret, nr_sectors; 1116 + int el_ret; 1117 + unsigned int bytes = bio->bi_size; 1116 1118 const unsigned short prio = bio_prio(bio); 1117 1119 const int sync = bio_sync(bio); 1118 1120 const int unplug = bio_unplug(bio); 1119 1121 int rw_flags; 1120 - 1121 - nr_sectors = bio_sectors(bio); 1122 1122 1123 1123 /* 1124 1124 * low level driver can indicate that it wants pages above a ··· 1143 1145 1144 1146 req->biotail->bi_next = bio; 1145 1147 req->biotail = bio; 1146 - req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1148 + req->data_len += bytes; 1147 1149 req->ioprio = ioprio_best(req->ioprio, prio); 1148 1150 if (!blk_rq_cpu_valid(req)) 1149 1151 req->cpu = bio->bi_comp_cpu; ··· 1169 1171 * not touch req->buffer either... 1170 1172 */ 1171 1173 req->buffer = bio_data(bio); 1172 - req->current_nr_sectors = bio_cur_sectors(bio); 1173 - req->hard_cur_sectors = req->current_nr_sectors; 1174 - req->sector = req->hard_sector = bio->bi_sector; 1175 - req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1174 + req->sector = bio->bi_sector; 1175 + req->data_len += bytes; 1176 1176 req->ioprio = ioprio_best(req->ioprio, prio); 1177 1177 if (!blk_rq_cpu_valid(req)) 1178 1178 req->cpu = bio->bi_comp_cpu; ··· 1553 1557 int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1554 1558 { 1555 1559 if (blk_rq_sectors(rq) > q->max_sectors || 1556 - rq->data_len > q->max_hw_sectors << 9) { 1560 + blk_rq_bytes(rq) > q->max_hw_sectors << 9) { 1557 1561 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1558 1562 return -EIO; 1559 1563 } ··· 1671 1675 } 1672 1676 } 1673 1677 1674 - /** 1675 - * blk_rq_bytes - Returns bytes left to complete in the entire request 1676 - * @rq: the request being processed 1677 - **/ 1678 - unsigned int blk_rq_bytes(struct request *rq) 1679 - { 1680 - if (blk_fs_request(rq)) 1681 - return blk_rq_sectors(rq) << 9; 1682 - 1683 - return rq->data_len; 1684 - } 1685 - EXPORT_SYMBOL_GPL(blk_rq_bytes); 1686 - 1687 - /** 1688 - * blk_rq_cur_bytes - Returns bytes left to complete in the current segment 1689 - * @rq: the request being processed 1690 - **/ 1691 - unsigned int blk_rq_cur_bytes(struct request *rq) 1692 - { 1693 - if (blk_fs_request(rq)) 1694 - return rq->current_nr_sectors << 9; 1695 - 1696 - if (rq->bio) 1697 - return rq->bio->bi_size; 1698 - 1699 - return rq->data_len; 1700 - } 1701 - EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1702 - 1703 1678 struct request *elv_next_request(struct request_queue *q) 1704 1679 { 1705 1680 struct request *rq; ··· 1703 1736 if (rq->cmd_flags & REQ_DONTPREP) 1704 1737 break; 1705 1738 1706 - if (q->dma_drain_size && rq->data_len) { 1739 + if (q->dma_drain_size && blk_rq_bytes(rq)) { 1707 1740 /* 1708 1741 * make sure space for the drain appears we 1709 1742 * know we can do this because max_hw_segments ··· 1726 1759 * avoid resource deadlock. REQ_STARTED will 1727 1760 * prevent other fs requests from passing this one. 1728 1761 */ 1729 - if (q->dma_drain_size && rq->data_len && 1762 + if (q->dma_drain_size && blk_rq_bytes(rq) && 1730 1763 !(rq->cmd_flags & REQ_DONTPREP)) { 1731 1764 /* 1732 1765 * remove the space for the drain we added ··· 1878 1911 * can find how many bytes remain in the request 1879 1912 * later. 1880 1913 */ 1881 - req->nr_sectors = req->hard_nr_sectors = 0; 1882 - req->current_nr_sectors = req->hard_cur_sectors = 0; 1914 + req->data_len = 0; 1883 1915 return false; 1884 1916 } 1885 1917 ··· 1892 1926 bio_iovec(bio)->bv_len -= nr_bytes; 1893 1927 } 1894 1928 1895 - blk_recalc_rq_sectors(req, total_bytes >> 9); 1929 + req->data_len -= total_bytes; 1930 + req->buffer = bio_data(req->bio); 1931 + 1932 + /* update sector only for requests with clear definition of sector */ 1933 + if (blk_fs_request(req) || blk_discard_rq(req)) 1934 + req->sector += total_bytes >> 9; 1935 + 1936 + /* 1937 + * If total number of sectors is less than the first segment 1938 + * size, something has gone terribly wrong. 1939 + */ 1940 + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { 1941 + printk(KERN_ERR "blk: request botched\n"); 1942 + req->data_len = blk_rq_cur_bytes(req); 1943 + } 1944 + 1945 + /* recalculate the number of segments */ 1896 1946 blk_recalc_rq_segments(req); 1947 + 1897 1948 return true; 1898 1949 } 1899 1950 EXPORT_SYMBOL_GPL(blk_update_request); ··· 2032 2049 rq->nr_phys_segments = bio_phys_segments(q, bio); 2033 2050 rq->buffer = bio_data(bio); 2034 2051 } 2035 - rq->current_nr_sectors = bio_cur_sectors(bio); 2036 - rq->hard_cur_sectors = rq->current_nr_sectors; 2037 - rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2038 2052 rq->data_len = bio->bi_size; 2039 - 2040 2053 rq->bio = rq->biotail = bio; 2041 2054 2042 2055 if (bio->bi_bdev)
+4 -32
block/blk-merge.c
··· 9 9 10 10 #include "blk.h" 11 11 12 - void blk_recalc_rq_sectors(struct request *rq, int nsect) 13 - { 14 - if (blk_fs_request(rq) || blk_discard_rq(rq)) { 15 - rq->hard_sector += nsect; 16 - rq->hard_nr_sectors -= nsect; 17 - 18 - /* 19 - * Move the I/O submission pointers ahead if required. 20 - */ 21 - if ((rq->nr_sectors >= rq->hard_nr_sectors) && 22 - (rq->sector <= rq->hard_sector)) { 23 - rq->sector = rq->hard_sector; 24 - rq->nr_sectors = rq->hard_nr_sectors; 25 - rq->hard_cur_sectors = bio_cur_sectors(rq->bio); 26 - rq->current_nr_sectors = rq->hard_cur_sectors; 27 - rq->buffer = bio_data(rq->bio); 28 - } 29 - 30 - /* 31 - * if total number of sectors is less than the first segment 32 - * size, something has gone terribly wrong 33 - */ 34 - if (rq->nr_sectors < rq->current_nr_sectors) { 35 - printk(KERN_ERR "blk: request botched\n"); 36 - rq->nr_sectors = rq->current_nr_sectors; 37 - } 38 - } 39 - } 40 - 41 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 42 13 struct bio *bio) 43 14 { ··· 170 199 171 200 172 201 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 173 - (rq->data_len & q->dma_pad_mask)) { 174 - unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1; 202 + (blk_rq_bytes(rq) & q->dma_pad_mask)) { 203 + unsigned int pad_len = 204 + (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 175 205 176 206 sg->length += pad_len; 177 207 rq->extra_len += pad_len; ··· 370 398 req->biotail->bi_next = next->bio; 371 399 req->biotail = next->biotail; 372 400 373 - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; 401 + req->data_len += blk_rq_bytes(next); 374 402 375 403 elv_merge_requests(q, req, next); 376 404
-1
block/blk.h
··· 101 101 int attempt_back_merge(struct request_queue *q, struct request *rq); 102 102 int attempt_front_merge(struct request_queue *q, struct request *rq); 103 103 void blk_recalc_rq_segments(struct request *rq); 104 - void blk_recalc_rq_sectors(struct request *rq, int nsect); 105 104 106 105 void blk_queue_congestion_threshold(struct request_queue *q); 107 106
+5 -5
block/cfq-iosched.c
··· 579 579 * Sort strictly based on sector. Smallest to the left, 580 580 * largest to the right. 581 581 */ 582 - if (sector > cfqq->next_rq->sector) 582 + if (sector > blk_rq_pos(cfqq->next_rq)) 583 583 n = &(*p)->rb_right; 584 - else if (sector < cfqq->next_rq->sector) 584 + else if (sector < blk_rq_pos(cfqq->next_rq)) 585 585 n = &(*p)->rb_left; 586 586 else 587 587 break; ··· 611 611 return; 612 612 613 613 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; 614 - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector, 615 - &parent, &p); 614 + __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, 615 + blk_rq_pos(cfqq->next_rq), &parent, &p); 616 616 if (!__cfqq) { 617 617 rb_link_node(&cfqq->p_node, parent, p); 618 618 rb_insert_color(&cfqq->p_node, cfqq->p_root); ··· 996 996 if (cfq_rq_close(cfqd, __cfqq->next_rq)) 997 997 return __cfqq; 998 998 999 - if (__cfqq->next_rq->sector < sector) 999 + if (blk_rq_pos(__cfqq->next_rq) < sector) 1000 1000 node = rb_next(&__cfqq->p_node); 1001 1001 else 1002 1002 node = rb_prev(&__cfqq->p_node);
+3 -3
include/linux/bio.h
··· 218 218 #define bio_sectors(bio) ((bio)->bi_size >> 9) 219 219 #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) 220 220 221 - static inline unsigned int bio_cur_sectors(struct bio *bio) 221 + static inline unsigned int bio_cur_bytes(struct bio *bio) 222 222 { 223 223 if (bio->bi_vcnt) 224 - return bio_iovec(bio)->bv_len >> 9; 224 + return bio_iovec(bio)->bv_len; 225 225 else /* dataless requests such as discard */ 226 - return bio->bi_size >> 9; 226 + return bio->bi_size; 227 227 } 228 228 229 229 static inline void *bio_data(struct bio *bio)
+16 -21
include/linux/blkdev.h
··· 166 166 enum rq_cmd_type_bits cmd_type; 167 167 unsigned long atomic_flags; 168 168 169 - /* Maintain bio traversal state for part by part I/O submission. 170 - * hard_* are block layer internals, no driver should touch them! 171 - */ 172 - 173 - sector_t sector; /* next sector to submit */ 174 - sector_t hard_sector; /* next sector to complete */ 175 - unsigned long nr_sectors; /* no. of sectors left to submit */ 176 - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ 177 - /* no. of sectors left to submit in the current segment */ 178 - unsigned int current_nr_sectors; 179 - 180 - /* no. of sectors left to complete in the current segment */ 181 - unsigned int hard_cur_sectors; 169 + sector_t sector; /* sector cursor */ 170 + unsigned int data_len; /* total data len, don't access directly */ 182 171 183 172 struct bio *bio; 184 173 struct bio *biotail; ··· 215 226 unsigned char __cmd[BLK_MAX_CDB]; 216 227 unsigned char *cmd; 217 228 218 - unsigned int data_len; 219 229 unsigned int extra_len; /* length of alignment and padding */ 220 230 unsigned int sense_len; 221 231 unsigned int resid_len; /* residual count */ ··· 828 840 */ 829 841 static inline sector_t blk_rq_pos(const struct request *rq) 830 842 { 831 - return rq->hard_sector; 843 + return rq->sector; 832 844 } 833 845 834 - extern unsigned int blk_rq_bytes(struct request *rq); 835 - extern unsigned int blk_rq_cur_bytes(struct request *rq); 846 + static inline unsigned int blk_rq_bytes(const struct request *rq) 847 + { 848 + return rq->data_len; 849 + } 850 + 851 + static inline int blk_rq_cur_bytes(const struct request *rq) 852 + { 853 + return rq->bio ? bio_cur_bytes(rq->bio) : 0; 854 + } 836 855 837 856 static inline unsigned int blk_rq_sectors(const struct request *rq) 838 857 { 839 - return rq->hard_nr_sectors; 858 + return blk_rq_bytes(rq) >> 9; 840 859 } 841 860 842 861 static inline unsigned int blk_rq_cur_sectors(const struct request *rq) 843 862 { 844 - return rq->hard_cur_sectors; 863 + return blk_rq_cur_bytes(rq) >> 9; 845 864 } 846 865 847 866 /* ··· 923 928 */ 924 929 static inline bool blk_end_request_cur(struct request *rq, int error) 925 930 { 926 - return blk_end_request(rq, error, rq->hard_cur_sectors << 9); 931 + return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 927 932 } 928 933 929 934 /** ··· 976 981 */ 977 982 static inline bool __blk_end_request_cur(struct request *rq, int error) 978 983 { 979 - return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); 984 + return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 980 985 } 981 986 982 987 extern void blk_complete_request(struct request *);
+1 -1
include/linux/elevator.h
··· 171 171 ELV_MQUEUE_MUST, 172 172 }; 173 173 174 - #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) 174 + #define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) 175 175 #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) 176 176 177 177 /*
+8 -8
kernel/trace/blktrace.c
··· 642 642 643 643 if (blk_pc_request(rq)) { 644 644 what |= BLK_TC_ACT(BLK_TC_PC); 645 - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 646 - rq->cmd_len, rq->cmd); 645 + __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 646 + what, rq->errors, rq->cmd_len, rq->cmd); 647 647 } else { 648 648 what |= BLK_TC_ACT(BLK_TC_FS); 649 - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, 650 - rw, what, rq->errors, 0, NULL); 649 + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, 650 + what, rq->errors, 0, NULL); 651 651 } 652 652 } 653 653 ··· 854 854 return; 855 855 856 856 if (blk_pc_request(rq)) 857 - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, 858 - rq->errors, len, data); 857 + __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 858 + BLK_TA_DRV_DATA, rq->errors, len, data); 859 859 else 860 - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, 861 - 0, BLK_TA_DRV_DATA, rq->errors, len, data); 860 + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 861 + BLK_TA_DRV_DATA, rq->errors, len, data); 862 862 } 863 863 EXPORT_SYMBOL_GPL(blk_add_driver_data); 864 864