···263263The generic i/o scheduler would make sure that it places the barrier request and264264all other requests coming after it after all the previous requests in the265265queue. Barriers may be implemented in different ways depending on the266266-driver. A SCSI driver for example could make use of ordered tags to267267-preserve the necessary ordering with a lower impact on throughput. For IDE268268-this might be two sync cache flush: a pre and post flush when encountering269269-a barrier write.270270-271271-There is a provision for queues to indicate what kind of barriers they272272-can provide. This is as of yet unmerged, details will be added here once it273273-is in the kernel.266266+driver. For more details regarding I/O barriers, please read barrier.txt267267+in this directory.2742682752691.2.2 Request Priority/Latency276270
+25-119
block/as-iosched.c
···182182183183static kmem_cache_t *arq_pool;184184185185+static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);186186+static void as_antic_stop(struct as_data *ad);187187+185188/*186189 * IO Context helper functions187190 */···373370 * existing request against the same sector), which can happen when using374371 * direct IO, then return the alias.375372 */376376-static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq)373373+static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)377374{378375 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;379376 struct rb_node *parent = NULL;···398395 rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));399396400397 return NULL;398398+}399399+400400+static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)401401+{402402+ struct as_rq *alias;403403+404404+ while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {405405+ as_move_to_dispatch(ad, alias);406406+ as_antic_stop(ad);407407+ }401408}402409403410static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)···11461133 /*11471134 * take it off the sort and fifo list, add to dispatch queue11481135 */11491149- while (!list_empty(&rq->queuelist)) {11501150- struct request *__rq = list_entry_rq(rq->queuelist.next);11511151- struct as_rq *__arq = RQ_DATA(__rq);11521152-11531153- list_del(&__rq->queuelist);11541154-11551155- elv_dispatch_add_tail(ad->q, __rq);11561156-11571157- if (__arq->io_context && __arq->io_context->aic)11581158- atomic_inc(&__arq->io_context->aic->nr_dispatched);11591159-11601160- WARN_ON(__arq->state != AS_RQ_QUEUED);11611161- __arq->state = AS_RQ_DISPATCHED;11621162-11631163- ad->nr_dispatched++;11641164- }11651165-11661136 as_remove_queued_request(ad->q, rq);11671137 WARN_ON(arq->state != AS_RQ_QUEUED);11681138···13221326}1323132713241328/*13251325- * Add arq to a list behind alias13261326- */13271327-static inline void13281328-as_add_aliased_request(struct as_data *ad, struct as_rq *arq,13291329- struct as_rq *alias)13301330-{13311331- struct request *req = arq->request;13321332- struct list_head *insert = alias->request->queuelist.prev;13331333-13341334- /*13351335- * Transfer list of aliases13361336- */13371337- while (!list_empty(&req->queuelist)) {13381338- struct request *__rq = list_entry_rq(req->queuelist.next);13391339- struct as_rq *__arq = RQ_DATA(__rq);13401340-13411341- list_move_tail(&__rq->queuelist, &alias->request->queuelist);13421342-13431343- WARN_ON(__arq->state != AS_RQ_QUEUED);13441344- }13451345-13461346- /*13471347- * Another request with the same start sector on the rbtree.13481348- * Link this request to that sector. They are untangled in13491349- * as_move_to_dispatch13501350- */13511351- list_add(&arq->request->queuelist, insert);13521352-13531353- /*13541354- * Don't want to have to handle merges.13551355- */13561356- as_del_arq_hash(arq);13571357- arq->request->flags |= REQ_NOMERGE;13581358-}13591359-13601360-/*13611329 * add arq to rbtree and fifo13621330 */13631331static void as_add_request(request_queue_t *q, struct request *rq)13641332{13651333 struct as_data *ad = q->elevator->elevator_data;13661334 struct as_rq *arq = RQ_DATA(rq);13671367- struct as_rq *alias;13681335 int data_dir;1369133613701337 arq->state = AS_RQ_NEW;···13461387 atomic_inc(&arq->io_context->aic->nr_queued);13471388 }1348138913491349- alias = as_add_arq_rb(ad, arq);13501350- if (!alias) {13511351- /*13521352- * set expire time (only used for reads) and add to fifo list13531353- */13541354- arq->expires = jiffies + ad->fifo_expire[data_dir];13551355- list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);13901390+ as_add_arq_rb(ad, arq);13911391+ if (rq_mergeable(arq->request))13921392+ as_add_arq_hash(ad, arq);1356139313571357- if (rq_mergeable(arq->request))13581358- as_add_arq_hash(ad, arq);13591359- as_update_arq(ad, arq); /* keep state machine up to date */13941394+ /*13951395+ * set expire time (only used for reads) and add to fifo list13961396+ */13971397+ arq->expires = jiffies + ad->fifo_expire[data_dir];13981398+ list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);1360139913611361- } else {13621362- as_add_aliased_request(ad, arq, alias);13631363-13641364- /*13651365- * have we been anticipating this request?13661366- * or does it come from the same process as the one we are13671367- * anticipating for?13681368- */13691369- if (ad->antic_status == ANTIC_WAIT_REQ13701370- || ad->antic_status == ANTIC_WAIT_NEXT) {13711371- if (as_can_break_anticipation(ad, arq))13721372- as_antic_stop(ad);13731373- }13741374- }13751375-14001400+ as_update_arq(ad, arq); /* keep state machine up to date */13761401 arq->state = AS_RQ_QUEUED;13771402}13781403···14791536 * if the merge was a front merge, we need to reposition request14801537 */14811538 if (rq_rb_key(req) != arq->rb_key) {14821482- struct as_rq *alias, *next_arq = NULL;14831483-14841484- if (ad->next_arq[arq->is_sync] == arq)14851485- next_arq = as_find_next_arq(ad, arq);14861486-14871487- /*14881488- * Note! We should really be moving any old aliased requests14891489- * off this request and try to insert them into the rbtree. We14901490- * currently don't bother. Ditto the next function.14911491- */14921539 as_del_arq_rb(ad, arq);14931493- if ((alias = as_add_arq_rb(ad, arq))) {14941494- list_del_init(&arq->fifo);14951495- as_add_aliased_request(ad, arq, alias);14961496- if (next_arq)14971497- ad->next_arq[arq->is_sync] = next_arq;14981498- }15401540+ as_add_arq_rb(ad, arq);14991541 /*15001542 * Note! At this stage of this and the next function, our next15011543 * request may not be optimal - eg the request may have "grown"···15071579 as_add_arq_hash(ad, arq);1508158015091581 if (rq_rb_key(req) != arq->rb_key) {15101510- struct as_rq *alias, *next_arq = NULL;15111511-15121512- if (ad->next_arq[arq->is_sync] == arq)15131513- next_arq = as_find_next_arq(ad, arq);15141514-15151582 as_del_arq_rb(ad, arq);15161516- if ((alias = as_add_arq_rb(ad, arq))) {15171517- list_del_init(&arq->fifo);15181518- as_add_aliased_request(ad, arq, alias);15191519- if (next_arq)15201520- ad->next_arq[arq->is_sync] = next_arq;15211521- }15831583+ as_add_arq_rb(ad, arq);15221584 }1523158515241586 /*···15251607 */15261608 swap_io_context(&arq->io_context, &anext->io_context);15271609 }15281528- }15291529-15301530- /*15311531- * Transfer list of aliases15321532- */15331533- while (!list_empty(&next->queuelist)) {15341534- struct request *__rq = list_entry_rq(next->queuelist.next);15351535- struct as_rq *__arq = RQ_DATA(__rq);15361536-15371537- list_move_tail(&__rq->queuelist, &req->queuelist);15381538-15391539- WARN_ON(__arq->state != AS_RQ_QUEUED);15401610 }1541161115421612 /*
+8-8
block/cfq-iosched.c
···2525/*2626 * tunables2727 */2828-static int cfq_quantum = 4; /* max queue in one round of service */2929-static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/3030-static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };3131-static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */3232-static int cfq_back_penalty = 2; /* penalty of a backwards seek */2828+static const int cfq_quantum = 4; /* max queue in one round of service */2929+static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/3030+static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };3131+static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */3232+static const int cfq_back_penalty = 2; /* penalty of a backwards seek */33333434-static int cfq_slice_sync = HZ / 10;3434+static const int cfq_slice_sync = HZ / 10;3535static int cfq_slice_async = HZ / 25;3636-static int cfq_slice_async_rq = 2;3636+static const int cfq_slice_async_rq = 2;3737static int cfq_slice_idle = HZ / 100;38383939#define CFQ_IDLE_GRACE (HZ / 10)···4545/*4646 * disable queueing at the driver/hardware level4747 */4848-static int cfq_max_depth = 2;4848+static const int cfq_max_depth = 2;49495050/*5151 * for the hash of cfqq inside the cfqd
+4-4
block/deadline-iosched.c
···1919/*2020 * See Documentation/block/deadline-iosched.txt2121 */2222-static int read_expire = HZ / 2; /* max time before a read is submitted. */2323-static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */2424-static int writes_starved = 2; /* max times reads can starve a write */2525-static int fifo_batch = 16; /* # of sequential requests treated as one2222+static const int read_expire = HZ / 2; /* max time before a read is submitted. */2323+static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */2424+static const int writes_starved = 2; /* max times reads can starve a write */2525+static const int fifo_batch = 16; /* # of sequential requests treated as one2626 by the above parameters. For throughput. */27272828static const int deadline_hash_shift = 5;
+59-27
block/elevator.c
···304304305305 rq->flags &= ~REQ_STARTED;306306307307- /*308308- * if this is the flush, requeue the original instead and drop the flush309309- */310310- if (rq->flags & REQ_BAR_FLUSH) {311311- clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);312312- rq = rq->end_io_data;313313- }314314-315315- __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);307307+ __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);316308}317309318310static void elv_drain_elevator(request_queue_t *q)···324332void __elv_add_request(request_queue_t *q, struct request *rq, int where,325333 int plug)326334{335335+ struct list_head *pos;336336+ unsigned ordseq;337337+338338+ if (q->ordcolor)339339+ rq->flags |= REQ_ORDERED_COLOR;340340+327341 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {342342+ /*343343+ * toggle ordered color344344+ */345345+ q->ordcolor ^= 1;346346+328347 /*329348 * barriers implicitly indicate back insertion330349 */···396393 q->elevator->ops->elevator_add_req_fn(q, rq);397394 break;398395396396+ case ELEVATOR_INSERT_REQUEUE:397397+ /*398398+ * If ordered flush isn't in progress, we do front399399+ * insertion; otherwise, requests should be requeued400400+ * in ordseq order.401401+ */402402+ rq->flags |= REQ_SOFTBARRIER;403403+404404+ if (q->ordseq == 0) {405405+ list_add(&rq->queuelist, &q->queue_head);406406+ break;407407+ }408408+409409+ ordseq = blk_ordered_req_seq(rq);410410+411411+ list_for_each(pos, &q->queue_head) {412412+ struct request *pos_rq = list_entry_rq(pos);413413+ if (ordseq <= blk_ordered_req_seq(pos_rq))414414+ break;415415+ }416416+417417+ list_add_tail(&rq->queuelist, pos);418418+ break;419419+399420 default:400421 printk(KERN_ERR "%s: bad insertion point %d\n",401422 __FUNCTION__, where);···449422{450423 struct request *rq;451424452452- if (unlikely(list_empty(&q->queue_head) &&453453- !q->elevator->ops->elevator_dispatch_fn(q, 0)))454454- return NULL;425425+ while (1) {426426+ while (!list_empty(&q->queue_head)) {427427+ rq = list_entry_rq(q->queue_head.next);428428+ if (blk_do_ordered(q, &rq))429429+ return rq;430430+ }455431456456- rq = list_entry_rq(q->queue_head.next);457457-458458- /*459459- * if this is a barrier write and the device has to issue a460460- * flush sequence to support it, check how far we are461461- */462462- if (blk_fs_request(rq) && blk_barrier_rq(rq)) {463463- BUG_ON(q->ordered == QUEUE_ORDERED_NONE);464464-465465- if (q->ordered == QUEUE_ORDERED_FLUSH &&466466- !blk_barrier_preflush(rq))467467- rq = blk_start_pre_flush(q, rq);432432+ if (!q->elevator->ops->elevator_dispatch_fn(q, 0))433433+ return NULL;468434 }469469-470470- return rq;471435}472436473437struct request *elv_next_request(request_queue_t *q)···516498 blkdev_dequeue_request(rq);517499 rq->flags |= REQ_QUIET;518500 end_that_request_chunk(rq, 0, nr_bytes);519519- end_that_request_last(rq);501501+ end_that_request_last(rq, 0);520502 } else {521503 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,522504 ret);···611593 * request is released from the driver, io must be done612594 */613595 if (blk_account_rq(rq)) {596596+ struct request *first_rq = list_entry_rq(q->queue_head.next);597597+614598 q->in_flight--;599599+600600+ /*601601+ * Check if the queue is waiting for fs requests to be602602+ * drained for flush sequence.603603+ */604604+ if (q->ordseq && q->in_flight == 0 &&605605+ blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&606606+ blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {607607+ blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);608608+ q->request_fn(q);609609+ }610610+615611 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)616612 e->ops->elevator_completed_req_fn(q, rq);617613 }
+339-221
block/ll_rw_blk.c
···3636static void blk_unplug_work(void *data);3737static void blk_unplug_timeout(unsigned long data);3838static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);3939+static void init_request_from_bio(struct request *req, struct bio *bio);4040+static int __make_request(request_queue_t *q, struct bio *bio);39414042/*4143 * For the allocated request tables···290288291289/**292290 * blk_queue_ordered - does this queue support ordered writes293293- * @q: the request queue294294- * @flag: see below291291+ * @q: the request queue292292+ * @ordered: one of QUEUE_ORDERED_*295293 *296294 * Description:297295 * For journalled file systems, doing ordered writes on a commit···300298 * feature should call this function and indicate so.301299 *302300 **/303303-void blk_queue_ordered(request_queue_t *q, int flag)301301+int blk_queue_ordered(request_queue_t *q, unsigned ordered,302302+ prepare_flush_fn *prepare_flush_fn)304303{305305- switch (flag) {306306- case QUEUE_ORDERED_NONE:307307- if (q->flush_rq)308308- kmem_cache_free(request_cachep, q->flush_rq);309309- q->flush_rq = NULL;310310- q->ordered = flag;311311- break;312312- case QUEUE_ORDERED_TAG:313313- q->ordered = flag;314314- break;315315- case QUEUE_ORDERED_FLUSH:316316- q->ordered = flag;317317- if (!q->flush_rq)318318- q->flush_rq = kmem_cache_alloc(request_cachep,319319- GFP_KERNEL);320320- break;321321- default:322322- printk("blk_queue_ordered: bad value %d\n", flag);323323- break;304304+ if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&305305+ prepare_flush_fn == NULL) {306306+ printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");307307+ return -EINVAL;324308 }309309+310310+ if (ordered != QUEUE_ORDERED_NONE &&311311+ ordered != QUEUE_ORDERED_DRAIN &&312312+ ordered != QUEUE_ORDERED_DRAIN_FLUSH &&313313+ ordered != QUEUE_ORDERED_DRAIN_FUA &&314314+ ordered != QUEUE_ORDERED_TAG &&315315+ ordered != QUEUE_ORDERED_TAG_FLUSH &&316316+ ordered != QUEUE_ORDERED_TAG_FUA) {317317+ printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);318318+ return -EINVAL;319319+ }320320+321321+ q->next_ordered = ordered;322322+ q->prepare_flush_fn = prepare_flush_fn;323323+324324+ return 0;325325}326326327327EXPORT_SYMBOL(blk_queue_ordered);···348344/*349345 * Cache flushing for ordered writes handling350346 */351351-static void blk_pre_flush_end_io(struct request *flush_rq)347347+inline unsigned blk_ordered_cur_seq(request_queue_t *q)352348{353353- struct request *rq = flush_rq->end_io_data;349349+ if (!q->ordseq)350350+ return 0;351351+ return 1 << ffz(q->ordseq);352352+}353353+354354+unsigned blk_ordered_req_seq(struct request *rq)355355+{354356 request_queue_t *q = rq->q;355357356356- elv_completed_request(q, flush_rq);358358+ BUG_ON(q->ordseq == 0);357359358358- rq->flags |= REQ_BAR_PREFLUSH;360360+ if (rq == &q->pre_flush_rq)361361+ return QUEUE_ORDSEQ_PREFLUSH;362362+ if (rq == &q->bar_rq)363363+ return QUEUE_ORDSEQ_BAR;364364+ if (rq == &q->post_flush_rq)365365+ return QUEUE_ORDSEQ_POSTFLUSH;359366360360- if (!flush_rq->errors)361361- elv_requeue_request(q, rq);362362- else {363363- q->end_flush_fn(q, flush_rq);364364- clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);365365- q->request_fn(q);366366- }367367+ if ((rq->flags & REQ_ORDERED_COLOR) ==368368+ (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))369369+ return QUEUE_ORDSEQ_DRAIN;370370+ else371371+ return QUEUE_ORDSEQ_DONE;367372}368373369369-static void blk_post_flush_end_io(struct request *flush_rq)374374+void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)370375{371371- struct request *rq = flush_rq->end_io_data;372372- request_queue_t *q = rq->q;376376+ struct request *rq;377377+ int uptodate;373378374374- elv_completed_request(q, flush_rq);379379+ if (error && !q->orderr)380380+ q->orderr = error;375381376376- rq->flags |= REQ_BAR_POSTFLUSH;382382+ BUG_ON(q->ordseq & seq);383383+ q->ordseq |= seq;377384378378- q->end_flush_fn(q, flush_rq);379379- clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);380380- q->request_fn(q);381381-}382382-383383-struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)384384-{385385- struct request *flush_rq = q->flush_rq;386386-387387- BUG_ON(!blk_barrier_rq(rq));388388-389389- if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))390390- return NULL;391391-392392- rq_init(q, flush_rq);393393- flush_rq->elevator_private = NULL;394394- flush_rq->flags = REQ_BAR_FLUSH;395395- flush_rq->rq_disk = rq->rq_disk;396396- flush_rq->rl = NULL;385385+ if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)386386+ return;397387398388 /*399399- * prepare_flush returns 0 if no flush is needed, just mark both400400- * pre and post flush as done in that case389389+ * Okay, sequence complete.401390 */402402- if (!q->prepare_flush_fn(q, flush_rq)) {403403- rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;404404- clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);405405- return rq;391391+ rq = q->orig_bar_rq;392392+ uptodate = q->orderr ? q->orderr : 1;393393+394394+ q->ordseq = 0;395395+396396+ end_that_request_first(rq, uptodate, rq->hard_nr_sectors);397397+ end_that_request_last(rq, uptodate);398398+}399399+400400+static void pre_flush_end_io(struct request *rq, int error)401401+{402402+ elv_completed_request(rq->q, rq);403403+ blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);404404+}405405+406406+static void bar_end_io(struct request *rq, int error)407407+{408408+ elv_completed_request(rq->q, rq);409409+ blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);410410+}411411+412412+static void post_flush_end_io(struct request *rq, int error)413413+{414414+ elv_completed_request(rq->q, rq);415415+ blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);416416+}417417+418418+static void queue_flush(request_queue_t *q, unsigned which)419419+{420420+ struct request *rq;421421+ rq_end_io_fn *end_io;422422+423423+ if (which == QUEUE_ORDERED_PREFLUSH) {424424+ rq = &q->pre_flush_rq;425425+ end_io = pre_flush_end_io;426426+ } else {427427+ rq = &q->post_flush_rq;428428+ end_io = post_flush_end_io;406429 }430430+431431+ rq_init(q, rq);432432+ rq->flags = REQ_HARDBARRIER;433433+ rq->elevator_private = NULL;434434+ rq->rq_disk = q->bar_rq.rq_disk;435435+ rq->rl = NULL;436436+ rq->end_io = end_io;437437+ q->prepare_flush_fn(q, rq);438438+439439+ __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);440440+}441441+442442+static inline struct request *start_ordered(request_queue_t *q,443443+ struct request *rq)444444+{445445+ q->bi_size = 0;446446+ q->orderr = 0;447447+ q->ordered = q->next_ordered;448448+ q->ordseq |= QUEUE_ORDSEQ_STARTED;407449408450 /*409409- * some drivers dequeue requests right away, some only after io410410- * completion. make sure the request is dequeued.451451+ * Prep proxy barrier request.411452 */412412- if (!list_empty(&rq->queuelist))413413- blkdev_dequeue_request(rq);453453+ blkdev_dequeue_request(rq);454454+ q->orig_bar_rq = rq;455455+ rq = &q->bar_rq;456456+ rq_init(q, rq);457457+ rq->flags = bio_data_dir(q->orig_bar_rq->bio);458458+ rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;459459+ rq->elevator_private = NULL;460460+ rq->rl = NULL;461461+ init_request_from_bio(rq, q->orig_bar_rq->bio);462462+ rq->end_io = bar_end_io;414463415415- flush_rq->end_io_data = rq;416416- flush_rq->end_io = blk_pre_flush_end_io;464464+ /*465465+ * Queue ordered sequence. As we stack them at the head, we466466+ * need to queue in reverse order. Note that we rely on that467467+ * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs468468+ * request gets inbetween ordered sequence.469469+ */470470+ if (q->ordered & QUEUE_ORDERED_POSTFLUSH)471471+ queue_flush(q, QUEUE_ORDERED_POSTFLUSH);472472+ else473473+ q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;417474418418- __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);419419- return flush_rq;475475+ __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);476476+477477+ if (q->ordered & QUEUE_ORDERED_PREFLUSH) {478478+ queue_flush(q, QUEUE_ORDERED_PREFLUSH);479479+ rq = &q->pre_flush_rq;480480+ } else481481+ q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;482482+483483+ if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)484484+ q->ordseq |= QUEUE_ORDSEQ_DRAIN;485485+ else486486+ rq = NULL;487487+488488+ return rq;420489}421490422422-static void blk_start_post_flush(request_queue_t *q, struct request *rq)491491+int blk_do_ordered(request_queue_t *q, struct request **rqp)423492{424424- struct request *flush_rq = q->flush_rq;493493+ struct request *rq = *rqp, *allowed_rq;494494+ int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);425495426426- BUG_ON(!blk_barrier_rq(rq));496496+ if (!q->ordseq) {497497+ if (!is_barrier)498498+ return 1;427499428428- rq_init(q, flush_rq);429429- flush_rq->elevator_private = NULL;430430- flush_rq->flags = REQ_BAR_FLUSH;431431- flush_rq->rq_disk = rq->rq_disk;432432- flush_rq->rl = NULL;433433-434434- if (q->prepare_flush_fn(q, flush_rq)) {435435- flush_rq->end_io_data = rq;436436- flush_rq->end_io = blk_post_flush_end_io;437437-438438- __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);439439- q->request_fn(q);500500+ if (q->next_ordered != QUEUE_ORDERED_NONE) {501501+ *rqp = start_ordered(q, rq);502502+ return 1;503503+ } else {504504+ /*505505+ * This can happen when the queue switches to506506+ * ORDERED_NONE while this request is on it.507507+ */508508+ blkdev_dequeue_request(rq);509509+ end_that_request_first(rq, -EOPNOTSUPP,510510+ rq->hard_nr_sectors);511511+ end_that_request_last(rq, -EOPNOTSUPP);512512+ *rqp = NULL;513513+ return 0;514514+ }440515 }441441-}442516443443-static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,444444- int sectors)445445-{446446- if (sectors > rq->nr_sectors)447447- sectors = rq->nr_sectors;448448-449449- rq->nr_sectors -= sectors;450450- return rq->nr_sectors;451451-}452452-453453-static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,454454- int sectors, int queue_locked)455455-{456456- if (q->ordered != QUEUE_ORDERED_FLUSH)457457- return 0;458458- if (!blk_fs_request(rq) || !blk_barrier_rq(rq))459459- return 0;460460- if (blk_barrier_postflush(rq))461461- return 0;462462-463463- if (!blk_check_end_barrier(q, rq, sectors)) {464464- unsigned long flags = 0;465465-466466- if (!queue_locked)467467- spin_lock_irqsave(q->queue_lock, flags);468468-469469- blk_start_post_flush(q, rq);470470-471471- if (!queue_locked)472472- spin_unlock_irqrestore(q->queue_lock, flags);517517+ if (q->ordered & QUEUE_ORDERED_TAG) {518518+ if (is_barrier && rq != &q->bar_rq)519519+ *rqp = NULL;520520+ return 1;473521 }522522+523523+ switch (blk_ordered_cur_seq(q)) {524524+ case QUEUE_ORDSEQ_PREFLUSH:525525+ allowed_rq = &q->pre_flush_rq;526526+ break;527527+ case QUEUE_ORDSEQ_BAR:528528+ allowed_rq = &q->bar_rq;529529+ break;530530+ case QUEUE_ORDSEQ_POSTFLUSH:531531+ allowed_rq = &q->post_flush_rq;532532+ break;533533+ default:534534+ allowed_rq = NULL;535535+ break;536536+ }537537+538538+ if (rq != allowed_rq &&539539+ (blk_fs_request(rq) || rq == &q->pre_flush_rq ||540540+ rq == &q->post_flush_rq))541541+ *rqp = NULL;474542475543 return 1;476544}477545478478-/**479479- * blk_complete_barrier_rq - complete possible barrier request480480- * @q: the request queue for the device481481- * @rq: the request482482- * @sectors: number of sectors to complete483483- *484484- * Description:485485- * Used in driver end_io handling to determine whether to postpone486486- * completion of a barrier request until a post flush has been done. This487487- * is the unlocked variant, used if the caller doesn't already hold the488488- * queue lock.489489- **/490490-int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)546546+static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)491547{492492- return __blk_complete_barrier_rq(q, rq, sectors, 0);493493-}494494-EXPORT_SYMBOL(blk_complete_barrier_rq);548548+ request_queue_t *q = bio->bi_private;549549+ struct bio_vec *bvec;550550+ int i;495551496496-/**497497- * blk_complete_barrier_rq_locked - complete possible barrier request498498- * @q: the request queue for the device499499- * @rq: the request500500- * @sectors: number of sectors to complete501501- *502502- * Description:503503- * See blk_complete_barrier_rq(). This variant must be used if the caller504504- * holds the queue lock.505505- **/506506-int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,507507- int sectors)508508-{509509- return __blk_complete_barrier_rq(q, rq, sectors, 1);552552+ /*553553+ * This is dry run, restore bio_sector and size. We'll finish554554+ * this request again with the original bi_end_io after an555555+ * error occurs or post flush is complete.556556+ */557557+ q->bi_size += bytes;558558+559559+ if (bio->bi_size)560560+ return 1;561561+562562+ /* Rewind bvec's */563563+ bio->bi_idx = 0;564564+ bio_for_each_segment(bvec, bio, i) {565565+ bvec->bv_len += bvec->bv_offset;566566+ bvec->bv_offset = 0;567567+ }568568+569569+ /* Reset bio */570570+ set_bit(BIO_UPTODATE, &bio->bi_flags);571571+ bio->bi_size = q->bi_size;572572+ bio->bi_sector -= (q->bi_size >> 9);573573+ q->bi_size = 0;574574+575575+ return 0;510576}511511-EXPORT_SYMBOL(blk_complete_barrier_rq_locked);577577+578578+static inline int ordered_bio_endio(struct request *rq, struct bio *bio,579579+ unsigned int nbytes, int error)580580+{581581+ request_queue_t *q = rq->q;582582+ bio_end_io_t *endio;583583+ void *private;584584+585585+ if (&q->bar_rq != rq)586586+ return 0;587587+588588+ /*589589+ * Okay, this is the barrier request in progress, dry finish it.590590+ */591591+ if (error && !q->orderr)592592+ q->orderr = error;593593+594594+ endio = bio->bi_end_io;595595+ private = bio->bi_private;596596+ bio->bi_end_io = flush_dry_bio_endio;597597+ bio->bi_private = q;598598+599599+ bio_endio(bio, nbytes, error);600600+601601+ bio->bi_end_io = endio;602602+ bio->bi_private = private;603603+604604+ return 1;605605+}512606513607/**514608 * blk_queue_bounce_limit - set bounce buffer limit for queue···1141103911421040EXPORT_SYMBOL(blk_queue_invalidate_tags);1143104111441144-static char *rq_flags[] = {10421042+static const char * const rq_flags[] = {11451043 "REQ_RW",11461044 "REQ_FAILFAST",11471045 "REQ_SORTED",11481046 "REQ_SOFTBARRIER",11491047 "REQ_HARDBARRIER",10481048+ "REQ_FUA",11501049 "REQ_CMD",11511050 "REQ_NOMERGE",11521051 "REQ_STARTED",···11671064 "REQ_PM_SUSPEND",11681065 "REQ_PM_RESUME",11691066 "REQ_PM_SHUTDOWN",10671067+ "REQ_ORDERED_COLOR",11701068};1171106911721070void blk_dump_rq_flags(struct request *rq, char *msg)···17451641 if (q->queue_tags)17461642 __blk_queue_free_tags(q);1747164317481748- blk_queue_ordered(q, QUEUE_ORDERED_NONE);17491749-17501644 kmem_cache_free(requestq_cachep, q);17511645}17521646···1768166617691667 return 0;17701668}17711771-17721772-static int __make_request(request_queue_t *, struct bio *);1773166917741670request_queue_t *blk_alloc_queue(gfp_t gfp_mask)17751671{···20081908{20091909 struct request *rq = NULL;20101910 struct request_list *rl = &q->rq;20112011- struct io_context *ioc = current_io_context(GFP_ATOMIC);20122012- int priv;19111911+ struct io_context *ioc = NULL;19121912+ int may_queue, priv;2013191320142014- if (rl->count[rw]+1 >= q->nr_requests) {20152015- /*20162016- * The queue will fill after this allocation, so set it as20172017- * full, and mark this process as "batching". This process20182018- * will be allowed to complete a batch of requests, others20192019- * will be blocked.20202020- */20212021- if (!blk_queue_full(q, rw)) {20222022- ioc_set_batching(q, ioc);20232023- blk_set_queue_full(q, rw);19141914+ may_queue = elv_may_queue(q, rw, bio);19151915+ if (may_queue == ELV_MQUEUE_NO)19161916+ goto rq_starved;19171917+19181918+ if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {19191919+ if (rl->count[rw]+1 >= q->nr_requests) {19201920+ ioc = current_io_context(GFP_ATOMIC);19211921+ /*19221922+ * The queue will fill after this allocation, so set19231923+ * it as full, and mark this process as "batching".19241924+ * This process will be allowed to complete a batch of19251925+ * requests, others will be blocked.19261926+ */19271927+ if (!blk_queue_full(q, rw)) {19281928+ ioc_set_batching(q, ioc);19291929+ blk_set_queue_full(q, rw);19301930+ } else {19311931+ if (may_queue != ELV_MQUEUE_MUST19321932+ && !ioc_batching(q, ioc)) {19331933+ /*19341934+ * The queue is full and the allocating19351935+ * process is not a "batcher", and not19361936+ * exempted by the IO scheduler19371937+ */19381938+ goto out;19391939+ }19401940+ }20241941 }19421942+ set_queue_congested(q, rw);20251943 }2026194420272027- switch (elv_may_queue(q, rw, bio)) {20282028- case ELV_MQUEUE_NO:20292029- goto rq_starved;20302030- case ELV_MQUEUE_MAY:20312031- break;20322032- case ELV_MQUEUE_MUST:20332033- goto get_rq;20342034- }20352035-20362036- if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {20372037- /*20382038- * The queue is full and the allocating process is not a20392039- * "batcher", and not exempted by the IO scheduler20402040- */20412041- goto out;20422042- }20432043-20442044-get_rq:20451945 /*20461946 * Only allow batching queuers to allocate up to 50% over the defined20471947 * limit of requests, otherwise we could have thousands of requests···2052195220531953 rl->count[rw]++;20541954 rl->starved[rw] = 0;20552055- if (rl->count[rw] >= queue_congestion_on_threshold(q))20562056- set_queue_congested(q, rw);2057195520581956 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);20591957 if (priv)···20601962 spin_unlock_irq(q->queue_lock);2061196320621964 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);20632063- if (!rq) {19651965+ if (unlikely(!rq)) {20641966 /*20651967 * Allocation failed presumably due to memory. Undo anything20661968 * we might have messed up.···20851987 goto out;20861988 }2087198919901990+ /*19911991+ * ioc may be NULL here, and ioc_batching will be false. That's19921992+ * OK, if the queue is under the request limit then requests need19931993+ * not count toward the nr_batch_requests limit. There will always19941994+ * be some limit enforced by BLK_BATCH_TIME.19951995+ */20881996 if (ioc_batching(q, ioc))20891997 ioc->nr_batch_requests--;20901998···24172313 */24182314void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,24192315 struct request *rq, int at_head,24202420- void (*done)(struct request *))23162316+ rq_end_io_fn *done)24212317{24222318 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;24232319···26212517 * blk_end_sync_rq - executes a completion event on a request26222518 * @rq: request to complete26232519 */26242624-void blk_end_sync_rq(struct request *rq)25202520+void blk_end_sync_rq(struct request *rq, int error)26252521{26262522 struct completion *waiting = rq->waiting;26272523···2759265527602656EXPORT_SYMBOL(blk_attempt_remerge);2761265726582658+static void init_request_from_bio(struct request *req, struct bio *bio)26592659+{26602660+ req->flags |= REQ_CMD;26612661+26622662+ /*26632663+ * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)26642664+ */26652665+ if (bio_rw_ahead(bio) || bio_failfast(bio))26662666+ req->flags |= REQ_FAILFAST;26672667+26682668+ /*26692669+ * REQ_BARRIER implies no merging, but lets make it explicit26702670+ */26712671+ if (unlikely(bio_barrier(bio)))26722672+ req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);26732673+26742674+ req->errors = 0;26752675+ req->hard_sector = req->sector = bio->bi_sector;26762676+ req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);26772677+ req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);26782678+ req->nr_phys_segments = bio_phys_segments(req->q, bio);26792679+ req->nr_hw_segments = bio_hw_segments(req->q, bio);26802680+ req->buffer = bio_data(bio); /* see ->buffer comment above */26812681+ req->waiting = NULL;26822682+ req->bio = req->biotail = bio;26832683+ req->ioprio = bio_prio(bio);26842684+ req->rq_disk = bio->bi_bdev->bd_disk;26852685+ req->start_time = jiffies;26862686+}26872687+27622688static int __make_request(request_queue_t *q, struct bio *bio)27632689{27642690 struct request *req;···28142680 spin_lock_prefetch(q->queue_lock);2815268128162682 barrier = bio_barrier(bio);28172817- if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {26832683+ if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {28182684 err = -EOPNOTSUPP;28192685 goto end_io;28202686 }···28842750 * We don't worry about that case for efficiency. It won't happen28852751 * often, and the elevators are able to handle it.28862752 */28872887-28882888- req->flags |= REQ_CMD;28892889-28902890- /*28912891- * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)28922892- */28932893- if (bio_rw_ahead(bio) || bio_failfast(bio))28942894- req->flags |= REQ_FAILFAST;28952895-28962896- /*28972897- * REQ_BARRIER implies no merging, but lets make it explicit28982898- */28992899- if (unlikely(barrier))29002900- req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);29012901-29022902- req->errors = 0;29032903- req->hard_sector = req->sector = sector;29042904- req->hard_nr_sectors = req->nr_sectors = nr_sectors;29052905- req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;29062906- req->nr_phys_segments = bio_phys_segments(q, bio);29072907- req->nr_hw_segments = bio_hw_segments(q, bio);29082908- req->buffer = bio_data(bio); /* see ->buffer comment above */29092909- req->waiting = NULL;29102910- req->bio = req->biotail = bio;29112911- req->ioprio = prio;29122912- req->rq_disk = bio->bi_bdev->bd_disk;29132913- req->start_time = jiffies;27532753+ init_request_from_bio(req, bio);2914275429152755 spin_lock_irq(q->queue_lock);29162756 if (elv_queue_empty(q))···31753067 if (nr_bytes >= bio->bi_size) {31763068 req->bio = bio->bi_next;31773069 nbytes = bio->bi_size;31783178- bio_endio(bio, nbytes, error);30703070+ if (!ordered_bio_endio(req, bio, nbytes, error))30713071+ bio_endio(bio, nbytes, error);31793072 next_idx = 0;31803073 bio_nbytes = 0;31813074 } else {···32313122 * if the request wasn't completed, update state32323123 */32333124 if (bio_nbytes) {32343234- bio_endio(bio, bio_nbytes, error);31253125+ if (!ordered_bio_endio(req, bio, bio_nbytes, error))31263126+ bio_endio(bio, bio_nbytes, error);32353127 bio->bi_idx += next_idx;32363128 bio_iovec(bio)->bv_offset += nr_bytes;32373129 bio_iovec(bio)->bv_len -= nr_bytes;···32893179/*32903180 * queue lock must be held32913181 */32923292-void end_that_request_last(struct request *req)31823182+void end_that_request_last(struct request *req, int uptodate)32933183{32943184 struct gendisk *disk = req->rq_disk;31853185+ int error;31863186+31873187+ /*31883188+ * extend uptodate bool to allow < 0 value to be direct io error31893189+ */31903190+ error = 0;31913191+ if (end_io_error(uptodate))31923192+ error = !uptodate ? -EIO : uptodate;3295319332963194 if (unlikely(laptop_mode) && blk_fs_request(req))32973195 laptop_io_completion();···33143196 disk->in_flight--;33153197 }33163198 if (req->end_io)33173317- req->end_io(req);31993199+ req->end_io(req, error);33183200 else33193201 __blk_put_request(req->q, req);33203202}···33263208 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {33273209 add_disk_randomness(req->rq_disk);33283210 blkdev_dequeue_request(req);33293329- end_that_request_last(req);32113211+ end_that_request_last(req, uptodate);33303212 }33313213}33323214
+1-1
block/scsi_ioctl.c
···46464747static int sg_get_version(int __user *p)4848{4949- static int sg_version_num = 30527;4949+ static const int sg_version_num = 30527;5050 return put_user(sg_version_num, p);5151}5252
+1-1
drivers/block/DAC960.c
···3471347134723472 if (!end_that_request_first(Request, UpToDate, Command->BlockCount)) {3473347334743474- end_that_request_last(Request);34743474+ end_that_request_last(Request, UpToDate);3475347534763476 if (Command->Completion) {34773477 complete(Command->Completion);
+1-1
drivers/block/cciss.c
···23102310 printk("Done with %p\n", cmd->rq);23112311#endif /* CCISS_DEBUG */ 2312231223132313- end_that_request_last(cmd->rq);23132313+ end_that_request_last(cmd->rq, status ? 1 : -EIO);23142314 cmd_free(h,cmd,1);23152315}23162316
+1-1
drivers/block/cpqarray.c
···10361036 complete_buffers(cmd->rq->bio, ok);1037103710381038 DBGPX(printk("Done with %p\n", cmd->rq););10391039- end_that_request_last(cmd->rq);10391039+ end_that_request_last(cmd->rq, ok ? 1 : -EIO);10401040}1041104110421042/*
+1-1
drivers/block/floppy.c
···23012301 add_disk_randomness(req->rq_disk);23022302 floppy_off((long)req->rq_disk->private_data);23032303 blkdev_dequeue_request(req);23042304- end_that_request_last(req);23042304+ end_that_request_last(req, uptodate);2305230523062306 /* We're done with the request */23072307 current_req = NULL;
···325325 if (unlikely(bio_flagged(bio, BIO_CLONED)))326326 return 0;327327328328- if (bio->bi_vcnt >= bio->bi_max_vecs)328328+ if (((bio->bi_size + len) >> 9) > max_sectors)329329 return 0;330330331331- if (((bio->bi_size + len) >> 9) > max_sectors)331331+ /*332332+ * For filesystems with a blocksize smaller than the pagesize333333+ * we will often be called with the same page as last time and334334+ * a consecutive offset. Optimize this special case.335335+ */336336+ if (bio->bi_vcnt > 0) {337337+ struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];338338+339339+ if (page == prev->bv_page &&340340+ offset == prev->bv_offset + prev->bv_len) {341341+ prev->bv_len += len;342342+ if (q->merge_bvec_fn &&343343+ q->merge_bvec_fn(q, bio, prev) < len) {344344+ prev->bv_len -= len;345345+ return 0;346346+ }347347+348348+ goto done;349349+ }350350+ }351351+352352+ if (bio->bi_vcnt >= bio->bi_max_vecs)332353 return 0;333354334355 /*···403382 bio->bi_vcnt++;404383 bio->bi_phys_segments++;405384 bio->bi_hw_segments++;385385+ done:406386 bio->bi_size += len;407387 return len;408388}