Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.34-rc7 446 lines 11 kB view raw
1/* 2 * Functions related to barrier IO handling 3 */ 4#include <linux/kernel.h> 5#include <linux/module.h> 6#include <linux/bio.h> 7#include <linux/blkdev.h> 8#include <linux/gfp.h> 9 10#include "blk.h" 11 12/** 13 * blk_queue_ordered - does this queue support ordered writes 14 * @q: the request queue 15 * @ordered: one of QUEUE_ORDERED_* 16 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 17 * 18 * Description: 19 * For journalled file systems, doing ordered writes on a commit 20 * block instead of explicitly doing wait_on_buffer (which is bad 21 * for performance) can be a big win. Block drivers supporting this 22 * feature should call this function and indicate so. 23 * 24 **/ 25int blk_queue_ordered(struct request_queue *q, unsigned ordered, 26 prepare_flush_fn *prepare_flush_fn) 27{ 28 if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | 29 QUEUE_ORDERED_DO_POSTFLUSH))) { 30 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); 31 return -EINVAL; 32 } 33 34 if (ordered != QUEUE_ORDERED_NONE && 35 ordered != QUEUE_ORDERED_DRAIN && 36 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 37 ordered != QUEUE_ORDERED_DRAIN_FUA && 38 ordered != QUEUE_ORDERED_TAG && 39 ordered != QUEUE_ORDERED_TAG_FLUSH && 40 ordered != QUEUE_ORDERED_TAG_FUA) { 41 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 42 return -EINVAL; 43 } 44 45 q->ordered = ordered; 46 q->next_ordered = ordered; 47 q->prepare_flush_fn = prepare_flush_fn; 48 49 return 0; 50} 51EXPORT_SYMBOL(blk_queue_ordered); 52 53/* 54 * Cache flushing for ordered writes handling 55 */ 56unsigned blk_ordered_cur_seq(struct request_queue *q) 57{ 58 if (!q->ordseq) 59 return 0; 60 return 1 << ffz(q->ordseq); 61} 62 63unsigned blk_ordered_req_seq(struct request *rq) 64{ 65 struct request_queue *q = rq->q; 66 67 BUG_ON(q->ordseq == 0); 68 69 if (rq == &q->pre_flush_rq) 70 return QUEUE_ORDSEQ_PREFLUSH; 71 if (rq == &q->bar_rq) 72 return QUEUE_ORDSEQ_BAR; 73 if (rq == &q->post_flush_rq) 74 return QUEUE_ORDSEQ_POSTFLUSH; 75 76 /* 77 * !fs requests don't need to follow barrier ordering. Always 78 * put them at the front. This fixes the following deadlock. 79 * 80 * http://thread.gmane.org/gmane.linux.kernel/537473 81 */ 82 if (!blk_fs_request(rq)) 83 return QUEUE_ORDSEQ_DRAIN; 84 85 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 86 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 87 return QUEUE_ORDSEQ_DRAIN; 88 else 89 return QUEUE_ORDSEQ_DONE; 90} 91 92bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 93{ 94 struct request *rq; 95 96 if (error && !q->orderr) 97 q->orderr = error; 98 99 BUG_ON(q->ordseq & seq); 100 q->ordseq |= seq; 101 102 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 103 return false; 104 105 /* 106 * Okay, sequence complete. 107 */ 108 q->ordseq = 0; 109 rq = q->orig_bar_rq; 110 __blk_end_request_all(rq, q->orderr); 111 return true; 112} 113 114static void pre_flush_end_io(struct request *rq, int error) 115{ 116 elv_completed_request(rq->q, rq); 117 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 118} 119 120static void bar_end_io(struct request *rq, int error) 121{ 122 elv_completed_request(rq->q, rq); 123 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 124} 125 126static void post_flush_end_io(struct request *rq, int error) 127{ 128 elv_completed_request(rq->q, rq); 129 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 130} 131 132static void queue_flush(struct request_queue *q, unsigned which) 133{ 134 struct request *rq; 135 rq_end_io_fn *end_io; 136 137 if (which == QUEUE_ORDERED_DO_PREFLUSH) { 138 rq = &q->pre_flush_rq; 139 end_io = pre_flush_end_io; 140 } else { 141 rq = &q->post_flush_rq; 142 end_io = post_flush_end_io; 143 } 144 145 blk_rq_init(q, rq); 146 rq->cmd_flags = REQ_HARDBARRIER; 147 rq->rq_disk = q->bar_rq.rq_disk; 148 rq->end_io = end_io; 149 q->prepare_flush_fn(q, rq); 150 151 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 152} 153 154static inline bool start_ordered(struct request_queue *q, struct request **rqp) 155{ 156 struct request *rq = *rqp; 157 unsigned skip = 0; 158 159 q->orderr = 0; 160 q->ordered = q->next_ordered; 161 q->ordseq |= QUEUE_ORDSEQ_STARTED; 162 163 /* 164 * For an empty barrier, there's no actual BAR request, which 165 * in turn makes POSTFLUSH unnecessary. Mask them off. 166 */ 167 if (!blk_rq_sectors(rq)) { 168 q->ordered &= ~(QUEUE_ORDERED_DO_BAR | 169 QUEUE_ORDERED_DO_POSTFLUSH); 170 /* 171 * Empty barrier on a write-through device w/ ordered 172 * tag has no command to issue and without any command 173 * to issue, ordering by tag can't be used. Drain 174 * instead. 175 */ 176 if ((q->ordered & QUEUE_ORDERED_BY_TAG) && 177 !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { 178 q->ordered &= ~QUEUE_ORDERED_BY_TAG; 179 q->ordered |= QUEUE_ORDERED_BY_DRAIN; 180 } 181 } 182 183 /* stash away the original request */ 184 blk_dequeue_request(rq); 185 q->orig_bar_rq = rq; 186 rq = NULL; 187 188 /* 189 * Queue ordered sequence. As we stack them at the head, we 190 * need to queue in reverse order. Note that we rely on that 191 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 192 * request gets inbetween ordered sequence. 193 */ 194 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { 195 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); 196 rq = &q->post_flush_rq; 197 } else 198 skip |= QUEUE_ORDSEQ_POSTFLUSH; 199 200 if (q->ordered & QUEUE_ORDERED_DO_BAR) { 201 rq = &q->bar_rq; 202 203 /* initialize proxy request and queue it */ 204 blk_rq_init(q, rq); 205 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 206 rq->cmd_flags |= REQ_RW; 207 if (q->ordered & QUEUE_ORDERED_DO_FUA) 208 rq->cmd_flags |= REQ_FUA; 209 init_request_from_bio(rq, q->orig_bar_rq->bio); 210 rq->end_io = bar_end_io; 211 212 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 213 } else 214 skip |= QUEUE_ORDSEQ_BAR; 215 216 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { 217 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); 218 rq = &q->pre_flush_rq; 219 } else 220 skip |= QUEUE_ORDSEQ_PREFLUSH; 221 222 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) 223 rq = NULL; 224 else 225 skip |= QUEUE_ORDSEQ_DRAIN; 226 227 *rqp = rq; 228 229 /* 230 * Complete skipped sequences. If whole sequence is complete, 231 * return false to tell elevator that this request is gone. 232 */ 233 return !blk_ordered_complete_seq(q, skip, 0); 234} 235 236bool blk_do_ordered(struct request_queue *q, struct request **rqp) 237{ 238 struct request *rq = *rqp; 239 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 240 241 if (!q->ordseq) { 242 if (!is_barrier) 243 return true; 244 245 if (q->next_ordered != QUEUE_ORDERED_NONE) 246 return start_ordered(q, rqp); 247 else { 248 /* 249 * Queue ordering not supported. Terminate 250 * with prejudice. 251 */ 252 blk_dequeue_request(rq); 253 __blk_end_request_all(rq, -EOPNOTSUPP); 254 *rqp = NULL; 255 return false; 256 } 257 } 258 259 /* 260 * Ordered sequence in progress 261 */ 262 263 /* Special requests are not subject to ordering rules. */ 264 if (!blk_fs_request(rq) && 265 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 266 return true; 267 268 if (q->ordered & QUEUE_ORDERED_BY_TAG) { 269 /* Ordered by tag. Blocking the next barrier is enough. */ 270 if (is_barrier && rq != &q->bar_rq) 271 *rqp = NULL; 272 } else { 273 /* Ordered by draining. Wait for turn. */ 274 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 275 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 276 *rqp = NULL; 277 } 278 279 return true; 280} 281 282static void bio_end_empty_barrier(struct bio *bio, int err) 283{ 284 if (err) { 285 if (err == -EOPNOTSUPP) 286 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 287 clear_bit(BIO_UPTODATE, &bio->bi_flags); 288 } 289 290 complete(bio->bi_private); 291} 292 293/** 294 * blkdev_issue_flush - queue a flush 295 * @bdev: blockdev to issue flush for 296 * @error_sector: error sector 297 * 298 * Description: 299 * Issue a flush for the block device in question. Caller can supply 300 * room for storing the error offset in case of a flush error, if they 301 * wish to. 302 */ 303int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 304{ 305 DECLARE_COMPLETION_ONSTACK(wait); 306 struct request_queue *q; 307 struct bio *bio; 308 int ret; 309 310 if (bdev->bd_disk == NULL) 311 return -ENXIO; 312 313 q = bdev_get_queue(bdev); 314 if (!q) 315 return -ENXIO; 316 317 bio = bio_alloc(GFP_KERNEL, 0); 318 bio->bi_end_io = bio_end_empty_barrier; 319 bio->bi_private = &wait; 320 bio->bi_bdev = bdev; 321 submit_bio(WRITE_BARRIER, bio); 322 323 wait_for_completion(&wait); 324 325 /* 326 * The driver must store the error location in ->bi_sector, if 327 * it supports it. For non-stacked drivers, this should be copied 328 * from blk_rq_pos(rq). 329 */ 330 if (error_sector) 331 *error_sector = bio->bi_sector; 332 333 ret = 0; 334 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 335 ret = -EOPNOTSUPP; 336 else if (!bio_flagged(bio, BIO_UPTODATE)) 337 ret = -EIO; 338 339 bio_put(bio); 340 return ret; 341} 342EXPORT_SYMBOL(blkdev_issue_flush); 343 344static void blkdev_discard_end_io(struct bio *bio, int err) 345{ 346 if (err) { 347 if (err == -EOPNOTSUPP) 348 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 349 clear_bit(BIO_UPTODATE, &bio->bi_flags); 350 } 351 352 if (bio->bi_private) 353 complete(bio->bi_private); 354 __free_page(bio_page(bio)); 355 356 bio_put(bio); 357} 358 359/** 360 * blkdev_issue_discard - queue a discard 361 * @bdev: blockdev to issue discard for 362 * @sector: start sector 363 * @nr_sects: number of sectors to discard 364 * @gfp_mask: memory allocation flags (for bio_alloc) 365 * @flags: DISCARD_FL_* flags to control behaviour 366 * 367 * Description: 368 * Issue a discard request for the sectors in question. 369 */ 370int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 371 sector_t nr_sects, gfp_t gfp_mask, int flags) 372{ 373 DECLARE_COMPLETION_ONSTACK(wait); 374 struct request_queue *q = bdev_get_queue(bdev); 375 int type = flags & DISCARD_FL_BARRIER ? 376 DISCARD_BARRIER : DISCARD_NOBARRIER; 377 struct bio *bio; 378 struct page *page; 379 int ret = 0; 380 381 if (!q) 382 return -ENXIO; 383 384 if (!blk_queue_discard(q)) 385 return -EOPNOTSUPP; 386 387 while (nr_sects && !ret) { 388 unsigned int sector_size = q->limits.logical_block_size; 389 unsigned int max_discard_sectors = 390 min(q->limits.max_discard_sectors, UINT_MAX >> 9); 391 392 bio = bio_alloc(gfp_mask, 1); 393 if (!bio) 394 goto out; 395 bio->bi_sector = sector; 396 bio->bi_end_io = blkdev_discard_end_io; 397 bio->bi_bdev = bdev; 398 if (flags & DISCARD_FL_WAIT) 399 bio->bi_private = &wait; 400 401 /* 402 * Add a zeroed one-sector payload as that's what 403 * our current implementations need. If we'll ever need 404 * more the interface will need revisiting. 405 */ 406 page = alloc_page(gfp_mask | __GFP_ZERO); 407 if (!page) 408 goto out_free_bio; 409 if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) 410 goto out_free_page; 411 412 /* 413 * And override the bio size - the way discard works we 414 * touch many more blocks on disk than the actual payload 415 * length. 416 */ 417 if (nr_sects > max_discard_sectors) { 418 bio->bi_size = max_discard_sectors << 9; 419 nr_sects -= max_discard_sectors; 420 sector += max_discard_sectors; 421 } else { 422 bio->bi_size = nr_sects << 9; 423 nr_sects = 0; 424 } 425 426 bio_get(bio); 427 submit_bio(type, bio); 428 429 if (flags & DISCARD_FL_WAIT) 430 wait_for_completion(&wait); 431 432 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 433 ret = -EOPNOTSUPP; 434 else if (!bio_flagged(bio, BIO_UPTODATE)) 435 ret = -EIO; 436 bio_put(bio); 437 } 438 return ret; 439out_free_page: 440 __free_page(page); 441out_free_bio: 442 bio_put(bio); 443out: 444 return -ENOMEM; 445} 446EXPORT_SYMBOL(blkdev_issue_discard);