Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
4 * for the blk-mq scheduling framework
5 *
6 * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
7 */
8#include <linux/kernel.h>
9#include <linux/fs.h>
10#include <linux/blkdev.h>
11#include <linux/bio.h>
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/init.h>
15#include <linux/compiler.h>
16#include <linux/rbtree.h>
17#include <linux/sbitmap.h>
18
19#include <trace/events/block.h>
20
21#include "elevator.h"
22#include "blk.h"
23#include "blk-mq.h"
24#include "blk-mq-debugfs.h"
25#include "blk-mq-sched.h"
26
27/*
28 * See Documentation/block/deadline-iosched.rst
29 */
30static const int read_expire = HZ / 2; /* max time before a read is submitted. */
31static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
32/*
33 * Time after which to dispatch lower priority requests even if higher
34 * priority requests are pending.
35 */
36static const int prio_aging_expire = 10 * HZ;
37static const int writes_starved = 2; /* max times reads can starve a write */
38static const int fifo_batch = 16; /* # of sequential requests treated as one
39 by the above parameters. For throughput. */
40
41enum dd_data_dir {
42 DD_READ = READ,
43 DD_WRITE = WRITE,
44};
45
46enum { DD_DIR_COUNT = 2 };
47
48enum dd_prio {
49 DD_RT_PRIO = 0,
50 DD_BE_PRIO = 1,
51 DD_IDLE_PRIO = 2,
52 DD_PRIO_MAX = 2,
53};
54
55enum { DD_PRIO_COUNT = 3 };
56
57/*
58 * I/O statistics per I/O priority. It is fine if these counters overflow.
59 * What matters is that these counters are at least as wide as
60 * log2(max_outstanding_requests).
61 */
62struct io_stats_per_prio {
63 uint32_t inserted;
64 uint32_t merged;
65 uint32_t dispatched;
66 atomic_t completed;
67};
68
69/*
70 * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
71 * present on both sort_list[] and fifo_list[].
72 */
73struct dd_per_prio {
74 struct rb_root sort_list[DD_DIR_COUNT];
75 struct list_head fifo_list[DD_DIR_COUNT];
76 /* Position of the most recently dispatched request. */
77 sector_t latest_pos[DD_DIR_COUNT];
78 struct io_stats_per_prio stats;
79};
80
81struct deadline_data {
82 /*
83 * run time data
84 */
85
86 struct list_head dispatch;
87 struct dd_per_prio per_prio[DD_PRIO_COUNT];
88
89 /* Data direction of latest dispatched request. */
90 enum dd_data_dir last_dir;
91 unsigned int batching; /* number of sequential requests made */
92 unsigned int starved; /* times reads have starved writes */
93
94 /*
95 * settings that change how the i/o scheduler behaves
96 */
97 int fifo_expire[DD_DIR_COUNT];
98 int fifo_batch;
99 int writes_starved;
100 int front_merges;
101 u32 async_depth;
102 int prio_aging_expire;
103
104 spinlock_t lock;
105};
106
107/* Maps an I/O priority class to a deadline scheduler priority. */
108static const enum dd_prio ioprio_class_to_prio[] = {
109 [IOPRIO_CLASS_NONE] = DD_BE_PRIO,
110 [IOPRIO_CLASS_RT] = DD_RT_PRIO,
111 [IOPRIO_CLASS_BE] = DD_BE_PRIO,
112 [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
113};
114
115static inline struct rb_root *
116deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
117{
118 return &per_prio->sort_list[rq_data_dir(rq)];
119}
120
121/*
122 * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
123 * request.
124 */
125static u8 dd_rq_ioclass(struct request *rq)
126{
127 return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
128}
129
130/*
131 * Return the first request for which blk_rq_pos() >= @pos.
132 */
133static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
134 enum dd_data_dir data_dir, sector_t pos)
135{
136 struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
137 struct request *rq, *res = NULL;
138
139 while (node) {
140 rq = rb_entry_rq(node);
141 if (blk_rq_pos(rq) >= pos) {
142 res = rq;
143 node = node->rb_left;
144 } else {
145 node = node->rb_right;
146 }
147 }
148 return res;
149}
150
151static void
152deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
153{
154 struct rb_root *root = deadline_rb_root(per_prio, rq);
155
156 elv_rb_add(root, rq);
157}
158
159static inline void
160deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
161{
162 elv_rb_del(deadline_rb_root(per_prio, rq), rq);
163}
164
165/*
166 * remove rq from rbtree and fifo.
167 */
168static void deadline_remove_request(struct request_queue *q,
169 struct dd_per_prio *per_prio,
170 struct request *rq)
171{
172 list_del_init(&rq->queuelist);
173
174 /*
175 * We might not be on the rbtree, if we are doing an insert merge
176 */
177 if (!RB_EMPTY_NODE(&rq->rb_node))
178 deadline_del_rq_rb(per_prio, rq);
179
180 elv_rqhash_del(q, rq);
181 if (q->last_merge == rq)
182 q->last_merge = NULL;
183}
184
185static void dd_request_merged(struct request_queue *q, struct request *req,
186 enum elv_merge type)
187{
188 struct deadline_data *dd = q->elevator->elevator_data;
189 const u8 ioprio_class = dd_rq_ioclass(req);
190 const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
191 struct dd_per_prio *per_prio = &dd->per_prio[prio];
192
193 /*
194 * if the merge was a front merge, we need to reposition request
195 */
196 if (type == ELEVATOR_FRONT_MERGE) {
197 elv_rb_del(deadline_rb_root(per_prio, req), req);
198 deadline_add_rq_rb(per_prio, req);
199 }
200}
201
202/*
203 * Callback function that is invoked after @next has been merged into @req.
204 */
205static void dd_merged_requests(struct request_queue *q, struct request *req,
206 struct request *next)
207{
208 struct deadline_data *dd = q->elevator->elevator_data;
209 const u8 ioprio_class = dd_rq_ioclass(next);
210 const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
211
212 lockdep_assert_held(&dd->lock);
213
214 dd->per_prio[prio].stats.merged++;
215
216 /*
217 * if next expires before rq, assign its expire time to rq
218 * and move into next position (next will be deleted) in fifo
219 */
220 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
221 if (time_before((unsigned long)next->fifo_time,
222 (unsigned long)req->fifo_time)) {
223 list_move(&req->queuelist, &next->queuelist);
224 req->fifo_time = next->fifo_time;
225 }
226 }
227
228 /*
229 * kill knowledge of next, this one is a goner
230 */
231 deadline_remove_request(q, &dd->per_prio[prio], next);
232}
233
234/*
235 * move an entry to dispatch queue
236 */
237static void
238deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
239 struct request *rq)
240{
241 /*
242 * take it off the sort and fifo list
243 */
244 deadline_remove_request(rq->q, per_prio, rq);
245}
246
247/* Number of requests queued for a given priority level. */
248static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
249{
250 const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
251
252 lockdep_assert_held(&dd->lock);
253
254 return stats->inserted - atomic_read(&stats->completed);
255}
256
257/*
258 * deadline_check_fifo returns true if and only if there are expired requests
259 * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
260 */
261static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
262 enum dd_data_dir data_dir)
263{
264 struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
265
266 return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
267}
268
269/*
270 * For the specified data direction, return the next request to
271 * dispatch using arrival ordered lists.
272 */
273static struct request *
274deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
275 enum dd_data_dir data_dir)
276{
277 if (list_empty(&per_prio->fifo_list[data_dir]))
278 return NULL;
279
280 return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
281}
282
283/*
284 * For the specified data direction, return the next request to
285 * dispatch using sector position sorted lists.
286 */
287static struct request *
288deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
289 enum dd_data_dir data_dir)
290{
291 return deadline_from_pos(per_prio, data_dir,
292 per_prio->latest_pos[data_dir]);
293}
294
295/*
296 * Returns true if and only if @rq started after @latest_start where
297 * @latest_start is in jiffies.
298 */
299static bool started_after(struct deadline_data *dd, struct request *rq,
300 unsigned long latest_start)
301{
302 unsigned long start_time = (unsigned long)rq->fifo_time;
303
304 start_time -= dd->fifo_expire[rq_data_dir(rq)];
305
306 return time_after(start_time, latest_start);
307}
308
309static struct request *dd_start_request(struct deadline_data *dd,
310 enum dd_data_dir data_dir,
311 struct request *rq)
312{
313 u8 ioprio_class = dd_rq_ioclass(rq);
314 enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
315
316 dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
317 dd->per_prio[prio].stats.dispatched++;
318 rq->rq_flags |= RQF_STARTED;
319 return rq;
320}
321
322/*
323 * deadline_dispatch_requests selects the best request according to
324 * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
325 */
326static struct request *__dd_dispatch_request(struct deadline_data *dd,
327 struct dd_per_prio *per_prio,
328 unsigned long latest_start)
329{
330 struct request *rq, *next_rq;
331 enum dd_data_dir data_dir;
332
333 lockdep_assert_held(&dd->lock);
334
335 /*
336 * batches are currently reads XOR writes
337 */
338 rq = deadline_next_request(dd, per_prio, dd->last_dir);
339 if (rq && dd->batching < dd->fifo_batch) {
340 /* we have a next request and are still entitled to batch */
341 data_dir = rq_data_dir(rq);
342 goto dispatch_request;
343 }
344
345 /*
346 * at this point we are not running a batch. select the appropriate
347 * data direction (read / write)
348 */
349
350 if (!list_empty(&per_prio->fifo_list[DD_READ])) {
351 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
352
353 if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
354 (dd->starved++ >= dd->writes_starved))
355 goto dispatch_writes;
356
357 data_dir = DD_READ;
358
359 goto dispatch_find_request;
360 }
361
362 /*
363 * there are either no reads or writes have been starved
364 */
365
366 if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
367dispatch_writes:
368 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
369
370 dd->starved = 0;
371
372 data_dir = DD_WRITE;
373
374 goto dispatch_find_request;
375 }
376
377 return NULL;
378
379dispatch_find_request:
380 /*
381 * we are not running a batch, find best request for selected data_dir
382 */
383 next_rq = deadline_next_request(dd, per_prio, data_dir);
384 if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
385 /*
386 * A deadline has expired, the last request was in the other
387 * direction, or we have run out of higher-sectored requests.
388 * Start again from the request with the earliest expiry time.
389 */
390 rq = deadline_fifo_request(dd, per_prio, data_dir);
391 } else {
392 /*
393 * The last req was the same dir and we have a next request in
394 * sort order. No expired requests so continue on from here.
395 */
396 rq = next_rq;
397 }
398
399 if (!rq)
400 return NULL;
401
402 dd->last_dir = data_dir;
403 dd->batching = 0;
404
405dispatch_request:
406 if (started_after(dd, rq, latest_start))
407 return NULL;
408
409 /*
410 * rq is the selected appropriate request.
411 */
412 dd->batching++;
413 deadline_move_request(dd, per_prio, rq);
414 return dd_start_request(dd, data_dir, rq);
415}
416
417/*
418 * Check whether there are any requests with priority other than DD_RT_PRIO
419 * that were inserted more than prio_aging_expire jiffies ago.
420 */
421static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
422 unsigned long now)
423{
424 struct request *rq;
425 enum dd_prio prio;
426 int prio_cnt;
427
428 lockdep_assert_held(&dd->lock);
429
430 prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
431 !!dd_queued(dd, DD_IDLE_PRIO);
432 if (prio_cnt < 2)
433 return NULL;
434
435 for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
436 rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
437 now - dd->prio_aging_expire);
438 if (rq)
439 return rq;
440 }
441
442 return NULL;
443}
444
445/*
446 * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
447 *
448 * One confusing aspect here is that we get called for a specific
449 * hardware queue, but we may return a request that is for a
450 * different hardware queue. This is because mq-deadline has shared
451 * state for all hardware queues, in terms of sorting, FIFOs, etc.
452 */
453static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
454{
455 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
456 const unsigned long now = jiffies;
457 struct request *rq;
458 enum dd_prio prio;
459
460 spin_lock(&dd->lock);
461
462 if (!list_empty(&dd->dispatch)) {
463 rq = list_first_entry(&dd->dispatch, struct request, queuelist);
464 list_del_init(&rq->queuelist);
465 dd_start_request(dd, rq_data_dir(rq), rq);
466 goto unlock;
467 }
468
469 rq = dd_dispatch_prio_aged_requests(dd, now);
470 if (rq)
471 goto unlock;
472
473 /*
474 * Next, dispatch requests in priority order. Ignore lower priority
475 * requests if any higher priority requests are pending.
476 */
477 for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
478 rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
479 if (rq || dd_queued(dd, prio))
480 break;
481 }
482
483unlock:
484 spin_unlock(&dd->lock);
485
486 return rq;
487}
488
489/*
490 * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
491 * function is used by __blk_mq_get_tag().
492 */
493static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
494{
495 struct deadline_data *dd = data->q->elevator->elevator_data;
496
497 /* Do not throttle synchronous reads. */
498 if (op_is_sync(opf) && !op_is_write(opf))
499 return;
500
501 /*
502 * Throttle asynchronous requests and writes such that these requests
503 * do not block the allocation of synchronous requests.
504 */
505 data->shallow_depth = dd->async_depth;
506}
507
508/* Called by blk_mq_update_nr_requests(). */
509static void dd_depth_updated(struct request_queue *q)
510{
511 struct deadline_data *dd = q->elevator->elevator_data;
512
513 dd->async_depth = q->nr_requests;
514 blk_mq_set_min_shallow_depth(q, 1);
515}
516
517static void dd_exit_sched(struct elevator_queue *e)
518{
519 struct deadline_data *dd = e->elevator_data;
520 enum dd_prio prio;
521
522 for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
523 struct dd_per_prio *per_prio = &dd->per_prio[prio];
524 const struct io_stats_per_prio *stats = &per_prio->stats;
525 uint32_t queued;
526
527 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
528 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
529
530 spin_lock(&dd->lock);
531 queued = dd_queued(dd, prio);
532 spin_unlock(&dd->lock);
533
534 WARN_ONCE(queued != 0,
535 "statistics for priority %d: i %u m %u d %u c %u\n",
536 prio, stats->inserted, stats->merged,
537 stats->dispatched, atomic_read(&stats->completed));
538 }
539
540 kfree(dd);
541}
542
543/*
544 * initialize elevator private data (deadline_data).
545 */
546static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
547{
548 struct deadline_data *dd;
549 enum dd_prio prio;
550
551 dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
552 if (!dd)
553 return -ENOMEM;
554
555 eq->elevator_data = dd;
556
557 INIT_LIST_HEAD(&dd->dispatch);
558 for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
559 struct dd_per_prio *per_prio = &dd->per_prio[prio];
560
561 INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
562 INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
563 per_prio->sort_list[DD_READ] = RB_ROOT;
564 per_prio->sort_list[DD_WRITE] = RB_ROOT;
565 }
566 dd->fifo_expire[DD_READ] = read_expire;
567 dd->fifo_expire[DD_WRITE] = write_expire;
568 dd->writes_starved = writes_starved;
569 dd->front_merges = 1;
570 dd->last_dir = DD_WRITE;
571 dd->fifo_batch = fifo_batch;
572 dd->prio_aging_expire = prio_aging_expire;
573 spin_lock_init(&dd->lock);
574
575 /* We dispatch from request queue wide instead of hw queue */
576 blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
577
578 q->elevator = eq;
579 dd_depth_updated(q);
580 return 0;
581}
582
583/*
584 * Try to merge @bio into an existing request. If @bio has been merged into
585 * an existing request, store the pointer to that request into *@rq.
586 */
587static int dd_request_merge(struct request_queue *q, struct request **rq,
588 struct bio *bio)
589{
590 struct deadline_data *dd = q->elevator->elevator_data;
591 const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
592 const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
593 struct dd_per_prio *per_prio = &dd->per_prio[prio];
594 sector_t sector = bio_end_sector(bio);
595 struct request *__rq;
596
597 if (!dd->front_merges)
598 return ELEVATOR_NO_MERGE;
599
600 __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
601 if (__rq) {
602 BUG_ON(sector != blk_rq_pos(__rq));
603
604 if (elv_bio_merge_ok(__rq, bio)) {
605 *rq = __rq;
606 if (blk_discard_mergable(__rq))
607 return ELEVATOR_DISCARD_MERGE;
608 return ELEVATOR_FRONT_MERGE;
609 }
610 }
611
612 return ELEVATOR_NO_MERGE;
613}
614
615/*
616 * Attempt to merge a bio into an existing request. This function is called
617 * before @bio is associated with a request.
618 */
619static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
620 unsigned int nr_segs)
621{
622 struct deadline_data *dd = q->elevator->elevator_data;
623 struct request *free = NULL;
624 bool ret;
625
626 spin_lock(&dd->lock);
627 ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
628 spin_unlock(&dd->lock);
629
630 if (free)
631 blk_mq_free_request(free);
632
633 return ret;
634}
635
636/*
637 * add rq to rbtree and fifo
638 */
639static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
640 blk_insert_t flags, struct list_head *free)
641{
642 struct request_queue *q = hctx->queue;
643 struct deadline_data *dd = q->elevator->elevator_data;
644 const enum dd_data_dir data_dir = rq_data_dir(rq);
645 u16 ioprio = req_get_ioprio(rq);
646 u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
647 struct dd_per_prio *per_prio;
648 enum dd_prio prio;
649
650 lockdep_assert_held(&dd->lock);
651
652 prio = ioprio_class_to_prio[ioprio_class];
653 per_prio = &dd->per_prio[prio];
654 if (!rq->elv.priv[0])
655 per_prio->stats.inserted++;
656 rq->elv.priv[0] = per_prio;
657
658 if (blk_mq_sched_try_insert_merge(q, rq, free))
659 return;
660
661 trace_block_rq_insert(rq);
662
663 if (flags & BLK_MQ_INSERT_AT_HEAD) {
664 list_add(&rq->queuelist, &dd->dispatch);
665 rq->fifo_time = jiffies;
666 } else {
667 deadline_add_rq_rb(per_prio, rq);
668
669 if (rq_mergeable(rq)) {
670 elv_rqhash_add(q, rq);
671 if (!q->last_merge)
672 q->last_merge = rq;
673 }
674
675 /*
676 * set expire time and add to fifo list
677 */
678 rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
679 list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
680 }
681}
682
683/*
684 * Called from blk_mq_insert_request() or blk_mq_dispatch_list().
685 */
686static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
687 struct list_head *list,
688 blk_insert_t flags)
689{
690 struct request_queue *q = hctx->queue;
691 struct deadline_data *dd = q->elevator->elevator_data;
692 LIST_HEAD(free);
693
694 spin_lock(&dd->lock);
695 while (!list_empty(list)) {
696 struct request *rq;
697
698 rq = list_first_entry(list, struct request, queuelist);
699 list_del_init(&rq->queuelist);
700 dd_insert_request(hctx, rq, flags, &free);
701 }
702 spin_unlock(&dd->lock);
703
704 blk_mq_free_requests(&free);
705}
706
707/* Callback from inside blk_mq_rq_ctx_init(). */
708static void dd_prepare_request(struct request *rq)
709{
710 rq->elv.priv[0] = NULL;
711}
712
713/*
714 * Callback from inside blk_mq_free_request().
715 */
716static void dd_finish_request(struct request *rq)
717{
718 struct dd_per_prio *per_prio = rq->elv.priv[0];
719
720 /*
721 * The block layer core may call dd_finish_request() without having
722 * called dd_insert_requests(). Skip requests that bypassed I/O
723 * scheduling. See also blk_mq_request_bypass_insert().
724 */
725 if (per_prio)
726 atomic_inc(&per_prio->stats.completed);
727}
728
729static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
730{
731 return !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
732 !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
733}
734
735static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
736{
737 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
738 enum dd_prio prio;
739
740 if (!list_empty_careful(&dd->dispatch))
741 return true;
742
743 for (prio = 0; prio <= DD_PRIO_MAX; prio++)
744 if (dd_has_work_for_prio(&dd->per_prio[prio]))
745 return true;
746
747 return false;
748}
749
750/*
751 * sysfs parts below
752 */
753#define SHOW_INT(__FUNC, __VAR) \
754static ssize_t __FUNC(struct elevator_queue *e, char *page) \
755{ \
756 struct deadline_data *dd = e->elevator_data; \
757 \
758 return sysfs_emit(page, "%d\n", __VAR); \
759}
760#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
761SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
762SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
763SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
764SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
765SHOW_INT(deadline_front_merges_show, dd->front_merges);
766SHOW_INT(deadline_async_depth_show, dd->async_depth);
767SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
768#undef SHOW_INT
769#undef SHOW_JIFFIES
770
771#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
772static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
773{ \
774 struct deadline_data *dd = e->elevator_data; \
775 int __data, __ret; \
776 \
777 __ret = kstrtoint(page, 0, &__data); \
778 if (__ret < 0) \
779 return __ret; \
780 if (__data < (MIN)) \
781 __data = (MIN); \
782 else if (__data > (MAX)) \
783 __data = (MAX); \
784 *(__PTR) = __CONV(__data); \
785 return count; \
786}
787#define STORE_INT(__FUNC, __PTR, MIN, MAX) \
788 STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
789#define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \
790 STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
791STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
792STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
793STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
794STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
795STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
796STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
797STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
798#undef STORE_FUNCTION
799#undef STORE_INT
800#undef STORE_JIFFIES
801
802#define DD_ATTR(name) \
803 __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
804
805static const struct elv_fs_entry deadline_attrs[] = {
806 DD_ATTR(read_expire),
807 DD_ATTR(write_expire),
808 DD_ATTR(writes_starved),
809 DD_ATTR(front_merges),
810 DD_ATTR(async_depth),
811 DD_ATTR(fifo_batch),
812 DD_ATTR(prio_aging_expire),
813 __ATTR_NULL
814};
815
816#ifdef CONFIG_BLK_DEBUG_FS
817#define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \
818static void *deadline_##name##_fifo_start(struct seq_file *m, \
819 loff_t *pos) \
820 __acquires(&dd->lock) \
821{ \
822 struct request_queue *q = m->private; \
823 struct deadline_data *dd = q->elevator->elevator_data; \
824 struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
825 \
826 spin_lock(&dd->lock); \
827 return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \
828} \
829 \
830static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \
831 loff_t *pos) \
832{ \
833 struct request_queue *q = m->private; \
834 struct deadline_data *dd = q->elevator->elevator_data; \
835 struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
836 \
837 return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \
838} \
839 \
840static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \
841 __releases(&dd->lock) \
842{ \
843 struct request_queue *q = m->private; \
844 struct deadline_data *dd = q->elevator->elevator_data; \
845 \
846 spin_unlock(&dd->lock); \
847} \
848 \
849static const struct seq_operations deadline_##name##_fifo_seq_ops = { \
850 .start = deadline_##name##_fifo_start, \
851 .next = deadline_##name##_fifo_next, \
852 .stop = deadline_##name##_fifo_stop, \
853 .show = blk_mq_debugfs_rq_show, \
854}; \
855 \
856static int deadline_##name##_next_rq_show(void *data, \
857 struct seq_file *m) \
858{ \
859 struct request_queue *q = data; \
860 struct deadline_data *dd = q->elevator->elevator_data; \
861 struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
862 struct request *rq; \
863 \
864 rq = deadline_from_pos(per_prio, data_dir, \
865 per_prio->latest_pos[data_dir]); \
866 if (rq) \
867 __blk_mq_debugfs_rq_show(m, rq); \
868 return 0; \
869}
870
871DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
872DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
873DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
874DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
875DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
876DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
877#undef DEADLINE_DEBUGFS_DDIR_ATTRS
878
879static int deadline_batching_show(void *data, struct seq_file *m)
880{
881 struct request_queue *q = data;
882 struct deadline_data *dd = q->elevator->elevator_data;
883
884 seq_printf(m, "%u\n", dd->batching);
885 return 0;
886}
887
888static int deadline_starved_show(void *data, struct seq_file *m)
889{
890 struct request_queue *q = data;
891 struct deadline_data *dd = q->elevator->elevator_data;
892
893 seq_printf(m, "%u\n", dd->starved);
894 return 0;
895}
896
897static int dd_async_depth_show(void *data, struct seq_file *m)
898{
899 struct request_queue *q = data;
900 struct deadline_data *dd = q->elevator->elevator_data;
901
902 seq_printf(m, "%u\n", dd->async_depth);
903 return 0;
904}
905
906static int dd_queued_show(void *data, struct seq_file *m)
907{
908 struct request_queue *q = data;
909 struct deadline_data *dd = q->elevator->elevator_data;
910 u32 rt, be, idle;
911
912 spin_lock(&dd->lock);
913 rt = dd_queued(dd, DD_RT_PRIO);
914 be = dd_queued(dd, DD_BE_PRIO);
915 idle = dd_queued(dd, DD_IDLE_PRIO);
916 spin_unlock(&dd->lock);
917
918 seq_printf(m, "%u %u %u\n", rt, be, idle);
919
920 return 0;
921}
922
923/* Number of requests owned by the block driver for a given priority. */
924static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
925{
926 const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
927
928 lockdep_assert_held(&dd->lock);
929
930 return stats->dispatched + stats->merged -
931 atomic_read(&stats->completed);
932}
933
934static int dd_owned_by_driver_show(void *data, struct seq_file *m)
935{
936 struct request_queue *q = data;
937 struct deadline_data *dd = q->elevator->elevator_data;
938 u32 rt, be, idle;
939
940 spin_lock(&dd->lock);
941 rt = dd_owned_by_driver(dd, DD_RT_PRIO);
942 be = dd_owned_by_driver(dd, DD_BE_PRIO);
943 idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
944 spin_unlock(&dd->lock);
945
946 seq_printf(m, "%u %u %u\n", rt, be, idle);
947
948 return 0;
949}
950
951static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
952 __acquires(&dd->lock)
953{
954 struct request_queue *q = m->private;
955 struct deadline_data *dd = q->elevator->elevator_data;
956
957 spin_lock(&dd->lock);
958 return seq_list_start(&dd->dispatch, *pos);
959}
960
961static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
962{
963 struct request_queue *q = m->private;
964 struct deadline_data *dd = q->elevator->elevator_data;
965
966 return seq_list_next(v, &dd->dispatch, pos);
967}
968
969static void deadline_dispatch_stop(struct seq_file *m, void *v)
970 __releases(&dd->lock)
971{
972 struct request_queue *q = m->private;
973 struct deadline_data *dd = q->elevator->elevator_data;
974
975 spin_unlock(&dd->lock);
976}
977
978static const struct seq_operations deadline_dispatch_seq_ops = {
979 .start = deadline_dispatch_start,
980 .next = deadline_dispatch_next,
981 .stop = deadline_dispatch_stop,
982 .show = blk_mq_debugfs_rq_show,
983};
984
985#define DEADLINE_QUEUE_DDIR_ATTRS(name) \
986 {#name "_fifo_list", 0400, \
987 .seq_ops = &deadline_##name##_fifo_seq_ops}
988#define DEADLINE_NEXT_RQ_ATTR(name) \
989 {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
990static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
991 DEADLINE_QUEUE_DDIR_ATTRS(read0),
992 DEADLINE_QUEUE_DDIR_ATTRS(write0),
993 DEADLINE_QUEUE_DDIR_ATTRS(read1),
994 DEADLINE_QUEUE_DDIR_ATTRS(write1),
995 DEADLINE_QUEUE_DDIR_ATTRS(read2),
996 DEADLINE_QUEUE_DDIR_ATTRS(write2),
997 DEADLINE_NEXT_RQ_ATTR(read0),
998 DEADLINE_NEXT_RQ_ATTR(write0),
999 DEADLINE_NEXT_RQ_ATTR(read1),
1000 DEADLINE_NEXT_RQ_ATTR(write1),
1001 DEADLINE_NEXT_RQ_ATTR(read2),
1002 DEADLINE_NEXT_RQ_ATTR(write2),
1003 {"batching", 0400, deadline_batching_show},
1004 {"starved", 0400, deadline_starved_show},
1005 {"async_depth", 0400, dd_async_depth_show},
1006 {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
1007 {"owned_by_driver", 0400, dd_owned_by_driver_show},
1008 {"queued", 0400, dd_queued_show},
1009 {},
1010};
1011#undef DEADLINE_QUEUE_DDIR_ATTRS
1012#endif
1013
1014static struct elevator_type mq_deadline = {
1015 .ops = {
1016 .depth_updated = dd_depth_updated,
1017 .limit_depth = dd_limit_depth,
1018 .insert_requests = dd_insert_requests,
1019 .dispatch_request = dd_dispatch_request,
1020 .prepare_request = dd_prepare_request,
1021 .finish_request = dd_finish_request,
1022 .next_request = elv_rb_latter_request,
1023 .former_request = elv_rb_former_request,
1024 .bio_merge = dd_bio_merge,
1025 .request_merge = dd_request_merge,
1026 .requests_merged = dd_merged_requests,
1027 .request_merged = dd_request_merged,
1028 .has_work = dd_has_work,
1029 .init_sched = dd_init_sched,
1030 .exit_sched = dd_exit_sched,
1031 },
1032
1033#ifdef CONFIG_BLK_DEBUG_FS
1034 .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
1035#endif
1036 .elevator_attrs = deadline_attrs,
1037 .elevator_name = "mq-deadline",
1038 .elevator_alias = "deadline",
1039 .elevator_owner = THIS_MODULE,
1040};
1041MODULE_ALIAS("mq-deadline-iosched");
1042
1043static int __init deadline_init(void)
1044{
1045 return elv_register(&mq_deadline);
1046}
1047
1048static void __exit deadline_exit(void)
1049{
1050 elv_unregister(&mq_deadline);
1051}
1052
1053module_init(deadline_init);
1054module_exit(deadline_exit);
1055
1056MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
1057MODULE_LICENSE("GPL");
1058MODULE_DESCRIPTION("MQ deadline IO scheduler");