Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.29-rc2 419 lines 10 kB view raw
1/* 2 * Functions related to barrier IO handling 3 */ 4#include <linux/kernel.h> 5#include <linux/module.h> 6#include <linux/bio.h> 7#include <linux/blkdev.h> 8 9#include "blk.h" 10 11/** 12 * blk_queue_ordered - does this queue support ordered writes 13 * @q: the request queue 14 * @ordered: one of QUEUE_ORDERED_* 15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 16 * 17 * Description: 18 * For journalled file systems, doing ordered writes on a commit 19 * block instead of explicitly doing wait_on_buffer (which is bad 20 * for performance) can be a big win. Block drivers supporting this 21 * feature should call this function and indicate so. 22 * 23 **/ 24int blk_queue_ordered(struct request_queue *q, unsigned ordered, 25 prepare_flush_fn *prepare_flush_fn) 26{ 27 if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | 28 QUEUE_ORDERED_DO_POSTFLUSH))) { 29 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); 30 return -EINVAL; 31 } 32 33 if (ordered != QUEUE_ORDERED_NONE && 34 ordered != QUEUE_ORDERED_DRAIN && 35 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 36 ordered != QUEUE_ORDERED_DRAIN_FUA && 37 ordered != QUEUE_ORDERED_TAG && 38 ordered != QUEUE_ORDERED_TAG_FLUSH && 39 ordered != QUEUE_ORDERED_TAG_FUA) { 40 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 41 return -EINVAL; 42 } 43 44 q->ordered = ordered; 45 q->next_ordered = ordered; 46 q->prepare_flush_fn = prepare_flush_fn; 47 48 return 0; 49} 50EXPORT_SYMBOL(blk_queue_ordered); 51 52/* 53 * Cache flushing for ordered writes handling 54 */ 55unsigned blk_ordered_cur_seq(struct request_queue *q) 56{ 57 if (!q->ordseq) 58 return 0; 59 return 1 << ffz(q->ordseq); 60} 61 62unsigned blk_ordered_req_seq(struct request *rq) 63{ 64 struct request_queue *q = rq->q; 65 66 BUG_ON(q->ordseq == 0); 67 68 if (rq == &q->pre_flush_rq) 69 return QUEUE_ORDSEQ_PREFLUSH; 70 if (rq == &q->bar_rq) 71 return QUEUE_ORDSEQ_BAR; 72 if (rq == &q->post_flush_rq) 73 return QUEUE_ORDSEQ_POSTFLUSH; 74 75 /* 76 * !fs requests don't need to follow barrier ordering. Always 77 * put them at the front. This fixes the following deadlock. 78 * 79 * http://thread.gmane.org/gmane.linux.kernel/537473 80 */ 81 if (!blk_fs_request(rq)) 82 return QUEUE_ORDSEQ_DRAIN; 83 84 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 85 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 86 return QUEUE_ORDSEQ_DRAIN; 87 else 88 return QUEUE_ORDSEQ_DONE; 89} 90 91bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 92{ 93 struct request *rq; 94 95 if (error && !q->orderr) 96 q->orderr = error; 97 98 BUG_ON(q->ordseq & seq); 99 q->ordseq |= seq; 100 101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 102 return false; 103 104 /* 105 * Okay, sequence complete. 106 */ 107 q->ordseq = 0; 108 rq = q->orig_bar_rq; 109 110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 111 BUG(); 112 113 return true; 114} 115 116static void pre_flush_end_io(struct request *rq, int error) 117{ 118 elv_completed_request(rq->q, rq); 119 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 120} 121 122static void bar_end_io(struct request *rq, int error) 123{ 124 elv_completed_request(rq->q, rq); 125 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 126} 127 128static void post_flush_end_io(struct request *rq, int error) 129{ 130 elv_completed_request(rq->q, rq); 131 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 132} 133 134static void queue_flush(struct request_queue *q, unsigned which) 135{ 136 struct request *rq; 137 rq_end_io_fn *end_io; 138 139 if (which == QUEUE_ORDERED_DO_PREFLUSH) { 140 rq = &q->pre_flush_rq; 141 end_io = pre_flush_end_io; 142 } else { 143 rq = &q->post_flush_rq; 144 end_io = post_flush_end_io; 145 } 146 147 blk_rq_init(q, rq); 148 rq->cmd_flags = REQ_HARDBARRIER; 149 rq->rq_disk = q->bar_rq.rq_disk; 150 rq->end_io = end_io; 151 q->prepare_flush_fn(q, rq); 152 153 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 154} 155 156static inline bool start_ordered(struct request_queue *q, struct request **rqp) 157{ 158 struct request *rq = *rqp; 159 unsigned skip = 0; 160 161 q->orderr = 0; 162 q->ordered = q->next_ordered; 163 q->ordseq |= QUEUE_ORDSEQ_STARTED; 164 165 /* 166 * For an empty barrier, there's no actual BAR request, which 167 * in turn makes POSTFLUSH unnecessary. Mask them off. 168 */ 169 if (!rq->hard_nr_sectors) { 170 q->ordered &= ~(QUEUE_ORDERED_DO_BAR | 171 QUEUE_ORDERED_DO_POSTFLUSH); 172 /* 173 * Empty barrier on a write-through device w/ ordered 174 * tag has no command to issue and without any command 175 * to issue, ordering by tag can't be used. Drain 176 * instead. 177 */ 178 if ((q->ordered & QUEUE_ORDERED_BY_TAG) && 179 !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { 180 q->ordered &= ~QUEUE_ORDERED_BY_TAG; 181 q->ordered |= QUEUE_ORDERED_BY_DRAIN; 182 } 183 } 184 185 /* stash away the original request */ 186 elv_dequeue_request(q, rq); 187 q->orig_bar_rq = rq; 188 rq = NULL; 189 190 /* 191 * Queue ordered sequence. As we stack them at the head, we 192 * need to queue in reverse order. Note that we rely on that 193 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 194 * request gets inbetween ordered sequence. 195 */ 196 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { 197 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); 198 rq = &q->post_flush_rq; 199 } else 200 skip |= QUEUE_ORDSEQ_POSTFLUSH; 201 202 if (q->ordered & QUEUE_ORDERED_DO_BAR) { 203 rq = &q->bar_rq; 204 205 /* initialize proxy request and queue it */ 206 blk_rq_init(q, rq); 207 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 208 rq->cmd_flags |= REQ_RW; 209 if (q->ordered & QUEUE_ORDERED_DO_FUA) 210 rq->cmd_flags |= REQ_FUA; 211 init_request_from_bio(rq, q->orig_bar_rq->bio); 212 rq->end_io = bar_end_io; 213 214 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 215 } else 216 skip |= QUEUE_ORDSEQ_BAR; 217 218 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { 219 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); 220 rq = &q->pre_flush_rq; 221 } else 222 skip |= QUEUE_ORDSEQ_PREFLUSH; 223 224 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) 225 rq = NULL; 226 else 227 skip |= QUEUE_ORDSEQ_DRAIN; 228 229 *rqp = rq; 230 231 /* 232 * Complete skipped sequences. If whole sequence is complete, 233 * return false to tell elevator that this request is gone. 234 */ 235 return !blk_ordered_complete_seq(q, skip, 0); 236} 237 238bool blk_do_ordered(struct request_queue *q, struct request **rqp) 239{ 240 struct request *rq = *rqp; 241 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 242 243 if (!q->ordseq) { 244 if (!is_barrier) 245 return true; 246 247 if (q->next_ordered != QUEUE_ORDERED_NONE) 248 return start_ordered(q, rqp); 249 else { 250 /* 251 * Queue ordering not supported. Terminate 252 * with prejudice. 253 */ 254 elv_dequeue_request(q, rq); 255 if (__blk_end_request(rq, -EOPNOTSUPP, 256 blk_rq_bytes(rq))) 257 BUG(); 258 *rqp = NULL; 259 return false; 260 } 261 } 262 263 /* 264 * Ordered sequence in progress 265 */ 266 267 /* Special requests are not subject to ordering rules. */ 268 if (!blk_fs_request(rq) && 269 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 270 return true; 271 272 if (q->ordered & QUEUE_ORDERED_BY_TAG) { 273 /* Ordered by tag. Blocking the next barrier is enough. */ 274 if (is_barrier && rq != &q->bar_rq) 275 *rqp = NULL; 276 } else { 277 /* Ordered by draining. Wait for turn. */ 278 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 279 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 280 *rqp = NULL; 281 } 282 283 return true; 284} 285 286static void bio_end_empty_barrier(struct bio *bio, int err) 287{ 288 if (err) { 289 if (err == -EOPNOTSUPP) 290 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 291 clear_bit(BIO_UPTODATE, &bio->bi_flags); 292 } 293 294 complete(bio->bi_private); 295} 296 297/** 298 * blkdev_issue_flush - queue a flush 299 * @bdev: blockdev to issue flush for 300 * @error_sector: error sector 301 * 302 * Description: 303 * Issue a flush for the block device in question. Caller can supply 304 * room for storing the error offset in case of a flush error, if they 305 * wish to. Caller must run wait_for_completion() on its own. 306 */ 307int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 308{ 309 DECLARE_COMPLETION_ONSTACK(wait); 310 struct request_queue *q; 311 struct bio *bio; 312 int ret; 313 314 if (bdev->bd_disk == NULL) 315 return -ENXIO; 316 317 q = bdev_get_queue(bdev); 318 if (!q) 319 return -ENXIO; 320 321 bio = bio_alloc(GFP_KERNEL, 0); 322 if (!bio) 323 return -ENOMEM; 324 325 bio->bi_end_io = bio_end_empty_barrier; 326 bio->bi_private = &wait; 327 bio->bi_bdev = bdev; 328 submit_bio(WRITE_BARRIER, bio); 329 330 wait_for_completion(&wait); 331 332 /* 333 * The driver must store the error location in ->bi_sector, if 334 * it supports it. For non-stacked drivers, this should be copied 335 * from rq->sector. 336 */ 337 if (error_sector) 338 *error_sector = bio->bi_sector; 339 340 ret = 0; 341 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 342 ret = -EOPNOTSUPP; 343 else if (!bio_flagged(bio, BIO_UPTODATE)) 344 ret = -EIO; 345 346 bio_put(bio); 347 return ret; 348} 349EXPORT_SYMBOL(blkdev_issue_flush); 350 351static void blkdev_discard_end_io(struct bio *bio, int err) 352{ 353 if (err) { 354 if (err == -EOPNOTSUPP) 355 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 356 clear_bit(BIO_UPTODATE, &bio->bi_flags); 357 } 358 359 bio_put(bio); 360} 361 362/** 363 * blkdev_issue_discard - queue a discard 364 * @bdev: blockdev to issue discard for 365 * @sector: start sector 366 * @nr_sects: number of sectors to discard 367 * @gfp_mask: memory allocation flags (for bio_alloc) 368 * 369 * Description: 370 * Issue a discard request for the sectors in question. Does not wait. 371 */ 372int blkdev_issue_discard(struct block_device *bdev, 373 sector_t sector, sector_t nr_sects, gfp_t gfp_mask) 374{ 375 struct request_queue *q; 376 struct bio *bio; 377 int ret = 0; 378 379 if (bdev->bd_disk == NULL) 380 return -ENXIO; 381 382 q = bdev_get_queue(bdev); 383 if (!q) 384 return -ENXIO; 385 386 if (!q->prepare_discard_fn) 387 return -EOPNOTSUPP; 388 389 while (nr_sects && !ret) { 390 bio = bio_alloc(gfp_mask, 0); 391 if (!bio) 392 return -ENOMEM; 393 394 bio->bi_end_io = blkdev_discard_end_io; 395 bio->bi_bdev = bdev; 396 397 bio->bi_sector = sector; 398 399 if (nr_sects > q->max_hw_sectors) { 400 bio->bi_size = q->max_hw_sectors << 9; 401 nr_sects -= q->max_hw_sectors; 402 sector += q->max_hw_sectors; 403 } else { 404 bio->bi_size = nr_sects << 9; 405 nr_sects = 0; 406 } 407 bio_get(bio); 408 submit_bio(DISCARD_BARRIER, bio); 409 410 /* Check if it failed immediately */ 411 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 412 ret = -EOPNOTSUPP; 413 else if (!bio_flagged(bio, BIO_UPTODATE)) 414 ret = -EIO; 415 bio_put(bio); 416 } 417 return ret; 418} 419EXPORT_SYMBOL(blkdev_issue_discard);