Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at cba767175becadc5c4016cceb7bfdd2c7fe722f4 387 lines 9.3 kB view raw
1/* 2 * Functions related to barrier IO handling 3 */ 4#include <linux/kernel.h> 5#include <linux/module.h> 6#include <linux/bio.h> 7#include <linux/blkdev.h> 8 9#include "blk.h" 10 11/** 12 * blk_queue_ordered - does this queue support ordered writes 13 * @q: the request queue 14 * @ordered: one of QUEUE_ORDERED_* 15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 16 * 17 * Description: 18 * For journalled file systems, doing ordered writes on a commit 19 * block instead of explicitly doing wait_on_buffer (which is bad 20 * for performance) can be a big win. Block drivers supporting this 21 * feature should call this function and indicate so. 22 * 23 **/ 24int blk_queue_ordered(struct request_queue *q, unsigned ordered, 25 prepare_flush_fn *prepare_flush_fn) 26{ 27 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && 28 prepare_flush_fn == NULL) { 29 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); 30 return -EINVAL; 31 } 32 33 if (ordered != QUEUE_ORDERED_NONE && 34 ordered != QUEUE_ORDERED_DRAIN && 35 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 36 ordered != QUEUE_ORDERED_DRAIN_FUA && 37 ordered != QUEUE_ORDERED_TAG && 38 ordered != QUEUE_ORDERED_TAG_FLUSH && 39 ordered != QUEUE_ORDERED_TAG_FUA) { 40 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 41 return -EINVAL; 42 } 43 44 q->ordered = ordered; 45 q->next_ordered = ordered; 46 q->prepare_flush_fn = prepare_flush_fn; 47 48 return 0; 49} 50EXPORT_SYMBOL(blk_queue_ordered); 51 52/* 53 * Cache flushing for ordered writes handling 54 */ 55unsigned blk_ordered_cur_seq(struct request_queue *q) 56{ 57 if (!q->ordseq) 58 return 0; 59 return 1 << ffz(q->ordseq); 60} 61 62unsigned blk_ordered_req_seq(struct request *rq) 63{ 64 struct request_queue *q = rq->q; 65 66 BUG_ON(q->ordseq == 0); 67 68 if (rq == &q->pre_flush_rq) 69 return QUEUE_ORDSEQ_PREFLUSH; 70 if (rq == &q->bar_rq) 71 return QUEUE_ORDSEQ_BAR; 72 if (rq == &q->post_flush_rq) 73 return QUEUE_ORDSEQ_POSTFLUSH; 74 75 /* 76 * !fs requests don't need to follow barrier ordering. Always 77 * put them at the front. This fixes the following deadlock. 78 * 79 * http://thread.gmane.org/gmane.linux.kernel/537473 80 */ 81 if (!blk_fs_request(rq)) 82 return QUEUE_ORDSEQ_DRAIN; 83 84 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 85 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 86 return QUEUE_ORDSEQ_DRAIN; 87 else 88 return QUEUE_ORDSEQ_DONE; 89} 90 91void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 92{ 93 struct request *rq; 94 95 if (error && !q->orderr) 96 q->orderr = error; 97 98 BUG_ON(q->ordseq & seq); 99 q->ordseq |= seq; 100 101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 102 return; 103 104 /* 105 * Okay, sequence complete. 106 */ 107 q->ordseq = 0; 108 rq = q->orig_bar_rq; 109 110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 111 BUG(); 112} 113 114static void pre_flush_end_io(struct request *rq, int error) 115{ 116 elv_completed_request(rq->q, rq); 117 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 118} 119 120static void bar_end_io(struct request *rq, int error) 121{ 122 elv_completed_request(rq->q, rq); 123 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 124} 125 126static void post_flush_end_io(struct request *rq, int error) 127{ 128 elv_completed_request(rq->q, rq); 129 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 130} 131 132static void queue_flush(struct request_queue *q, unsigned which) 133{ 134 struct request *rq; 135 rq_end_io_fn *end_io; 136 137 if (which == QUEUE_ORDERED_PREFLUSH) { 138 rq = &q->pre_flush_rq; 139 end_io = pre_flush_end_io; 140 } else { 141 rq = &q->post_flush_rq; 142 end_io = post_flush_end_io; 143 } 144 145 blk_rq_init(q, rq); 146 rq->cmd_flags = REQ_HARDBARRIER; 147 rq->rq_disk = q->bar_rq.rq_disk; 148 rq->end_io = end_io; 149 q->prepare_flush_fn(q, rq); 150 151 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 152} 153 154static inline struct request *start_ordered(struct request_queue *q, 155 struct request *rq) 156{ 157 q->orderr = 0; 158 q->ordered = q->next_ordered; 159 q->ordseq |= QUEUE_ORDSEQ_STARTED; 160 161 /* 162 * Prep proxy barrier request. 163 */ 164 elv_dequeue_request(q, rq); 165 q->orig_bar_rq = rq; 166 rq = &q->bar_rq; 167 blk_rq_init(q, rq); 168 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 169 rq->cmd_flags |= REQ_RW; 170 if (q->ordered & QUEUE_ORDERED_FUA) 171 rq->cmd_flags |= REQ_FUA; 172 init_request_from_bio(rq, q->orig_bar_rq->bio); 173 rq->end_io = bar_end_io; 174 175 /* 176 * Queue ordered sequence. As we stack them at the head, we 177 * need to queue in reverse order. Note that we rely on that 178 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 179 * request gets inbetween ordered sequence. If this request is 180 * an empty barrier, we don't need to do a postflush ever since 181 * there will be no data written between the pre and post flush. 182 * Hence a single flush will suffice. 183 */ 184 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) 185 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 186 else 187 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 188 189 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 190 191 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 192 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 193 rq = &q->pre_flush_rq; 194 } else 195 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; 196 197 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) 198 q->ordseq |= QUEUE_ORDSEQ_DRAIN; 199 else 200 rq = NULL; 201 202 return rq; 203} 204 205int blk_do_ordered(struct request_queue *q, struct request **rqp) 206{ 207 struct request *rq = *rqp; 208 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 209 210 if (!q->ordseq) { 211 if (!is_barrier) 212 return 1; 213 214 if (q->next_ordered != QUEUE_ORDERED_NONE) { 215 *rqp = start_ordered(q, rq); 216 return 1; 217 } else { 218 /* 219 * This can happen when the queue switches to 220 * ORDERED_NONE while this request is on it. 221 */ 222 elv_dequeue_request(q, rq); 223 if (__blk_end_request(rq, -EOPNOTSUPP, 224 blk_rq_bytes(rq))) 225 BUG(); 226 *rqp = NULL; 227 return 0; 228 } 229 } 230 231 /* 232 * Ordered sequence in progress 233 */ 234 235 /* Special requests are not subject to ordering rules. */ 236 if (!blk_fs_request(rq) && 237 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 238 return 1; 239 240 if (q->ordered & QUEUE_ORDERED_TAG) { 241 /* Ordered by tag. Blocking the next barrier is enough. */ 242 if (is_barrier && rq != &q->bar_rq) 243 *rqp = NULL; 244 } else { 245 /* Ordered by draining. Wait for turn. */ 246 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 247 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 248 *rqp = NULL; 249 } 250 251 return 1; 252} 253 254static void bio_end_empty_barrier(struct bio *bio, int err) 255{ 256 if (err) { 257 if (err == -EOPNOTSUPP) 258 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 259 clear_bit(BIO_UPTODATE, &bio->bi_flags); 260 } 261 262 complete(bio->bi_private); 263} 264 265/** 266 * blkdev_issue_flush - queue a flush 267 * @bdev: blockdev to issue flush for 268 * @error_sector: error sector 269 * 270 * Description: 271 * Issue a flush for the block device in question. Caller can supply 272 * room for storing the error offset in case of a flush error, if they 273 * wish to. Caller must run wait_for_completion() on its own. 274 */ 275int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 276{ 277 DECLARE_COMPLETION_ONSTACK(wait); 278 struct request_queue *q; 279 struct bio *bio; 280 int ret; 281 282 if (bdev->bd_disk == NULL) 283 return -ENXIO; 284 285 q = bdev_get_queue(bdev); 286 if (!q) 287 return -ENXIO; 288 289 bio = bio_alloc(GFP_KERNEL, 0); 290 if (!bio) 291 return -ENOMEM; 292 293 bio->bi_end_io = bio_end_empty_barrier; 294 bio->bi_private = &wait; 295 bio->bi_bdev = bdev; 296 submit_bio(WRITE_BARRIER, bio); 297 298 wait_for_completion(&wait); 299 300 /* 301 * The driver must store the error location in ->bi_sector, if 302 * it supports it. For non-stacked drivers, this should be copied 303 * from rq->sector. 304 */ 305 if (error_sector) 306 *error_sector = bio->bi_sector; 307 308 ret = 0; 309 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 310 ret = -EOPNOTSUPP; 311 else if (!bio_flagged(bio, BIO_UPTODATE)) 312 ret = -EIO; 313 314 bio_put(bio); 315 return ret; 316} 317EXPORT_SYMBOL(blkdev_issue_flush); 318 319static void blkdev_discard_end_io(struct bio *bio, int err) 320{ 321 if (err) { 322 if (err == -EOPNOTSUPP) 323 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 324 clear_bit(BIO_UPTODATE, &bio->bi_flags); 325 } 326 327 bio_put(bio); 328} 329 330/** 331 * blkdev_issue_discard - queue a discard 332 * @bdev: blockdev to issue discard for 333 * @sector: start sector 334 * @nr_sects: number of sectors to discard 335 * @gfp_mask: memory allocation flags (for bio_alloc) 336 * 337 * Description: 338 * Issue a discard request for the sectors in question. Does not wait. 339 */ 340int blkdev_issue_discard(struct block_device *bdev, 341 sector_t sector, sector_t nr_sects, gfp_t gfp_mask) 342{ 343 struct request_queue *q; 344 struct bio *bio; 345 int ret = 0; 346 347 if (bdev->bd_disk == NULL) 348 return -ENXIO; 349 350 q = bdev_get_queue(bdev); 351 if (!q) 352 return -ENXIO; 353 354 if (!q->prepare_discard_fn) 355 return -EOPNOTSUPP; 356 357 while (nr_sects && !ret) { 358 bio = bio_alloc(gfp_mask, 0); 359 if (!bio) 360 return -ENOMEM; 361 362 bio->bi_end_io = blkdev_discard_end_io; 363 bio->bi_bdev = bdev; 364 365 bio->bi_sector = sector; 366 367 if (nr_sects > q->max_hw_sectors) { 368 bio->bi_size = q->max_hw_sectors << 9; 369 nr_sects -= q->max_hw_sectors; 370 sector += q->max_hw_sectors; 371 } else { 372 bio->bi_size = nr_sects << 9; 373 nr_sects = 0; 374 } 375 bio_get(bio); 376 submit_bio(DISCARD_BARRIER, bio); 377 378 /* Check if it failed immediately */ 379 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 380 ret = -EOPNOTSUPP; 381 else if (!bio_flagged(bio, BIO_UPTODATE)) 382 ret = -EIO; 383 bio_put(bio); 384 } 385 return ret; 386} 387EXPORT_SYMBOL(blkdev_issue_discard);