Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at dc39455e7948ec9bc5f3f2dced5c2f5ac8a8dfd9 323 lines 7.9 kB view raw
1/* 2 * Functions related to barrier IO handling 3 */ 4#include <linux/kernel.h> 5#include <linux/module.h> 6#include <linux/bio.h> 7#include <linux/blkdev.h> 8 9#include "blk.h" 10 11/** 12 * blk_queue_ordered - does this queue support ordered writes 13 * @q: the request queue 14 * @ordered: one of QUEUE_ORDERED_* 15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 16 * 17 * Description: 18 * For journalled file systems, doing ordered writes on a commit 19 * block instead of explicitly doing wait_on_buffer (which is bad 20 * for performance) can be a big win. Block drivers supporting this 21 * feature should call this function and indicate so. 22 * 23 **/ 24int blk_queue_ordered(struct request_queue *q, unsigned ordered, 25 prepare_flush_fn *prepare_flush_fn) 26{ 27 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && 28 prepare_flush_fn == NULL) { 29 printk(KERN_ERR "%s: prepare_flush_fn required\n", 30 __FUNCTION__); 31 return -EINVAL; 32 } 33 34 if (ordered != QUEUE_ORDERED_NONE && 35 ordered != QUEUE_ORDERED_DRAIN && 36 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 37 ordered != QUEUE_ORDERED_DRAIN_FUA && 38 ordered != QUEUE_ORDERED_TAG && 39 ordered != QUEUE_ORDERED_TAG_FLUSH && 40 ordered != QUEUE_ORDERED_TAG_FUA) { 41 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 42 return -EINVAL; 43 } 44 45 q->ordered = ordered; 46 q->next_ordered = ordered; 47 q->prepare_flush_fn = prepare_flush_fn; 48 49 return 0; 50} 51EXPORT_SYMBOL(blk_queue_ordered); 52 53/* 54 * Cache flushing for ordered writes handling 55 */ 56inline unsigned blk_ordered_cur_seq(struct request_queue *q) 57{ 58 if (!q->ordseq) 59 return 0; 60 return 1 << ffz(q->ordseq); 61} 62 63unsigned blk_ordered_req_seq(struct request *rq) 64{ 65 struct request_queue *q = rq->q; 66 67 BUG_ON(q->ordseq == 0); 68 69 if (rq == &q->pre_flush_rq) 70 return QUEUE_ORDSEQ_PREFLUSH; 71 if (rq == &q->bar_rq) 72 return QUEUE_ORDSEQ_BAR; 73 if (rq == &q->post_flush_rq) 74 return QUEUE_ORDSEQ_POSTFLUSH; 75 76 /* 77 * !fs requests don't need to follow barrier ordering. Always 78 * put them at the front. This fixes the following deadlock. 79 * 80 * http://thread.gmane.org/gmane.linux.kernel/537473 81 */ 82 if (!blk_fs_request(rq)) 83 return QUEUE_ORDSEQ_DRAIN; 84 85 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 86 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 87 return QUEUE_ORDSEQ_DRAIN; 88 else 89 return QUEUE_ORDSEQ_DONE; 90} 91 92void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 93{ 94 struct request *rq; 95 96 if (error && !q->orderr) 97 q->orderr = error; 98 99 BUG_ON(q->ordseq & seq); 100 q->ordseq |= seq; 101 102 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 103 return; 104 105 /* 106 * Okay, sequence complete. 107 */ 108 q->ordseq = 0; 109 rq = q->orig_bar_rq; 110 111 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 112 BUG(); 113} 114 115static void pre_flush_end_io(struct request *rq, int error) 116{ 117 elv_completed_request(rq->q, rq); 118 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 119} 120 121static void bar_end_io(struct request *rq, int error) 122{ 123 elv_completed_request(rq->q, rq); 124 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 125} 126 127static void post_flush_end_io(struct request *rq, int error) 128{ 129 elv_completed_request(rq->q, rq); 130 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 131} 132 133static void queue_flush(struct request_queue *q, unsigned which) 134{ 135 struct request *rq; 136 rq_end_io_fn *end_io; 137 138 if (which == QUEUE_ORDERED_PREFLUSH) { 139 rq = &q->pre_flush_rq; 140 end_io = pre_flush_end_io; 141 } else { 142 rq = &q->post_flush_rq; 143 end_io = post_flush_end_io; 144 } 145 146 rq->cmd_flags = REQ_HARDBARRIER; 147 rq_init(q, rq); 148 rq->elevator_private = NULL; 149 rq->elevator_private2 = NULL; 150 rq->rq_disk = q->bar_rq.rq_disk; 151 rq->end_io = end_io; 152 q->prepare_flush_fn(q, rq); 153 154 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 155} 156 157static inline struct request *start_ordered(struct request_queue *q, 158 struct request *rq) 159{ 160 q->orderr = 0; 161 q->ordered = q->next_ordered; 162 q->ordseq |= QUEUE_ORDSEQ_STARTED; 163 164 /* 165 * Prep proxy barrier request. 166 */ 167 blkdev_dequeue_request(rq); 168 q->orig_bar_rq = rq; 169 rq = &q->bar_rq; 170 rq->cmd_flags = 0; 171 rq_init(q, rq); 172 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 173 rq->cmd_flags |= REQ_RW; 174 if (q->ordered & QUEUE_ORDERED_FUA) 175 rq->cmd_flags |= REQ_FUA; 176 rq->elevator_private = NULL; 177 rq->elevator_private2 = NULL; 178 init_request_from_bio(rq, q->orig_bar_rq->bio); 179 rq->end_io = bar_end_io; 180 181 /* 182 * Queue ordered sequence. As we stack them at the head, we 183 * need to queue in reverse order. Note that we rely on that 184 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 185 * request gets inbetween ordered sequence. If this request is 186 * an empty barrier, we don't need to do a postflush ever since 187 * there will be no data written between the pre and post flush. 188 * Hence a single flush will suffice. 189 */ 190 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) 191 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 192 else 193 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 194 195 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 196 197 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 198 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 199 rq = &q->pre_flush_rq; 200 } else 201 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; 202 203 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) 204 q->ordseq |= QUEUE_ORDSEQ_DRAIN; 205 else 206 rq = NULL; 207 208 return rq; 209} 210 211int blk_do_ordered(struct request_queue *q, struct request **rqp) 212{ 213 struct request *rq = *rqp; 214 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 215 216 if (!q->ordseq) { 217 if (!is_barrier) 218 return 1; 219 220 if (q->next_ordered != QUEUE_ORDERED_NONE) { 221 *rqp = start_ordered(q, rq); 222 return 1; 223 } else { 224 /* 225 * This can happen when the queue switches to 226 * ORDERED_NONE while this request is on it. 227 */ 228 blkdev_dequeue_request(rq); 229 if (__blk_end_request(rq, -EOPNOTSUPP, 230 blk_rq_bytes(rq))) 231 BUG(); 232 *rqp = NULL; 233 return 0; 234 } 235 } 236 237 /* 238 * Ordered sequence in progress 239 */ 240 241 /* Special requests are not subject to ordering rules. */ 242 if (!blk_fs_request(rq) && 243 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 244 return 1; 245 246 if (q->ordered & QUEUE_ORDERED_TAG) { 247 /* Ordered by tag. Blocking the next barrier is enough. */ 248 if (is_barrier && rq != &q->bar_rq) 249 *rqp = NULL; 250 } else { 251 /* Ordered by draining. Wait for turn. */ 252 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 253 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 254 *rqp = NULL; 255 } 256 257 return 1; 258} 259 260static void bio_end_empty_barrier(struct bio *bio, int err) 261{ 262 if (err) { 263 if (err == -EOPNOTSUPP) 264 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 265 clear_bit(BIO_UPTODATE, &bio->bi_flags); 266 } 267 268 complete(bio->bi_private); 269} 270 271/** 272 * blkdev_issue_flush - queue a flush 273 * @bdev: blockdev to issue flush for 274 * @error_sector: error sector 275 * 276 * Description: 277 * Issue a flush for the block device in question. Caller can supply 278 * room for storing the error offset in case of a flush error, if they 279 * wish to. Caller must run wait_for_completion() on its own. 280 */ 281int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 282{ 283 DECLARE_COMPLETION_ONSTACK(wait); 284 struct request_queue *q; 285 struct bio *bio; 286 int ret; 287 288 if (bdev->bd_disk == NULL) 289 return -ENXIO; 290 291 q = bdev_get_queue(bdev); 292 if (!q) 293 return -ENXIO; 294 295 bio = bio_alloc(GFP_KERNEL, 0); 296 if (!bio) 297 return -ENOMEM; 298 299 bio->bi_end_io = bio_end_empty_barrier; 300 bio->bi_private = &wait; 301 bio->bi_bdev = bdev; 302 submit_bio(1 << BIO_RW_BARRIER, bio); 303 304 wait_for_completion(&wait); 305 306 /* 307 * The driver must store the error location in ->bi_sector, if 308 * it supports it. For non-stacked drivers, this should be copied 309 * from rq->sector. 310 */ 311 if (error_sector) 312 *error_sector = bio->bi_sector; 313 314 ret = 0; 315 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 316 ret = -EOPNOTSUPP; 317 else if (!bio_flagged(bio, BIO_UPTODATE)) 318 ret = -EIO; 319 320 bio_put(bio); 321 return ret; 322} 323EXPORT_SYMBOL(blkdev_issue_flush);