Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.18-rc7 2482 lines 59 kB view raw
1/* 2 * CFQ, or complete fairness queueing, disk scheduler. 3 * 4 * Based on ideas from a previously unfinished io 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 6 * 7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 8 */ 9#include <linux/module.h> 10#include <linux/blkdev.h> 11#include <linux/elevator.h> 12#include <linux/hash.h> 13#include <linux/rbtree.h> 14#include <linux/ioprio.h> 15 16/* 17 * tunables 18 */ 19static const int cfq_quantum = 4; /* max queue in one round of service */ 20static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 21static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 22static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 23static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 24 25static const int cfq_slice_sync = HZ / 10; 26static int cfq_slice_async = HZ / 25; 27static const int cfq_slice_async_rq = 2; 28static int cfq_slice_idle = HZ / 125; 29 30#define CFQ_IDLE_GRACE (HZ / 10) 31#define CFQ_SLICE_SCALE (5) 32 33#define CFQ_KEY_ASYNC (0) 34 35static DEFINE_SPINLOCK(cfq_exit_lock); 36 37/* 38 * for the hash of cfqq inside the cfqd 39 */ 40#define CFQ_QHASH_SHIFT 6 41#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 42#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 43 44/* 45 * for the hash of crq inside the cfqq 46 */ 47#define CFQ_MHASH_SHIFT 6 48#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) 49#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) 50#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) 51#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 52#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) 53 54#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 55#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 56 57#define RQ_DATA(rq) (rq)->elevator_private 58 59/* 60 * rb-tree defines 61 */ 62#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) 63#define rq_rb_key(rq) (rq)->sector 64 65static kmem_cache_t *crq_pool; 66static kmem_cache_t *cfq_pool; 67static kmem_cache_t *cfq_ioc_pool; 68 69static atomic_t ioc_count = ATOMIC_INIT(0); 70static struct completion *ioc_gone; 71 72#define CFQ_PRIO_LISTS IOPRIO_BE_NR 73#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 74#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) 75#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 76 77#define ASYNC (0) 78#define SYNC (1) 79 80#define cfq_cfqq_dispatched(cfqq) \ 81 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) 82 83#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) 84 85#define cfq_cfqq_sync(cfqq) \ 86 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 87 88#define sample_valid(samples) ((samples) > 80) 89 90/* 91 * Per block device queue structure 92 */ 93struct cfq_data { 94 request_queue_t *queue; 95 96 /* 97 * rr list of queues with requests and the count of them 98 */ 99 struct list_head rr_list[CFQ_PRIO_LISTS]; 100 struct list_head busy_rr; 101 struct list_head cur_rr; 102 struct list_head idle_rr; 103 unsigned int busy_queues; 104 105 /* 106 * non-ordered list of empty cfqq's 107 */ 108 struct list_head empty_list; 109 110 /* 111 * cfqq lookup hash 112 */ 113 struct hlist_head *cfq_hash; 114 115 /* 116 * global crq hash for all queues 117 */ 118 struct hlist_head *crq_hash; 119 120 mempool_t *crq_pool; 121 122 int rq_in_driver; 123 int hw_tag; 124 125 /* 126 * schedule slice state info 127 */ 128 /* 129 * idle window management 130 */ 131 struct timer_list idle_slice_timer; 132 struct work_struct unplug_work; 133 134 struct cfq_queue *active_queue; 135 struct cfq_io_context *active_cic; 136 int cur_prio, cur_end_prio; 137 unsigned int dispatch_slice; 138 139 struct timer_list idle_class_timer; 140 141 sector_t last_sector; 142 unsigned long last_end_request; 143 144 unsigned int rq_starved; 145 146 /* 147 * tunables, see top of file 148 */ 149 unsigned int cfq_quantum; 150 unsigned int cfq_queued; 151 unsigned int cfq_fifo_expire[2]; 152 unsigned int cfq_back_penalty; 153 unsigned int cfq_back_max; 154 unsigned int cfq_slice[2]; 155 unsigned int cfq_slice_async_rq; 156 unsigned int cfq_slice_idle; 157 158 struct list_head cic_list; 159}; 160 161/* 162 * Per process-grouping structure 163 */ 164struct cfq_queue { 165 /* reference count */ 166 atomic_t ref; 167 /* parent cfq_data */ 168 struct cfq_data *cfqd; 169 /* cfqq lookup hash */ 170 struct hlist_node cfq_hash; 171 /* hash key */ 172 unsigned int key; 173 /* on either rr or empty list of cfqd */ 174 struct list_head cfq_list; 175 /* sorted list of pending requests */ 176 struct rb_root sort_list; 177 /* if fifo isn't expired, next request to serve */ 178 struct cfq_rq *next_crq; 179 /* requests queued in sort_list */ 180 int queued[2]; 181 /* currently allocated requests */ 182 int allocated[2]; 183 /* fifo list of requests in sort_list */ 184 struct list_head fifo; 185 186 unsigned long slice_start; 187 unsigned long slice_end; 188 unsigned long slice_left; 189 unsigned long service_last; 190 191 /* number of requests that are on the dispatch list */ 192 int on_dispatch[2]; 193 194 /* io prio of this group */ 195 unsigned short ioprio, org_ioprio; 196 unsigned short ioprio_class, org_ioprio_class; 197 198 /* various state flags, see below */ 199 unsigned int flags; 200}; 201 202struct cfq_rq { 203 struct rb_node rb_node; 204 sector_t rb_key; 205 struct request *request; 206 struct hlist_node hash; 207 208 struct cfq_queue *cfq_queue; 209 struct cfq_io_context *io_context; 210 211 unsigned int crq_flags; 212}; 213 214enum cfqq_state_flags { 215 CFQ_CFQQ_FLAG_on_rr = 0, 216 CFQ_CFQQ_FLAG_wait_request, 217 CFQ_CFQQ_FLAG_must_alloc, 218 CFQ_CFQQ_FLAG_must_alloc_slice, 219 CFQ_CFQQ_FLAG_must_dispatch, 220 CFQ_CFQQ_FLAG_fifo_expire, 221 CFQ_CFQQ_FLAG_idle_window, 222 CFQ_CFQQ_FLAG_prio_changed, 223}; 224 225#define CFQ_CFQQ_FNS(name) \ 226static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 227{ \ 228 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 229} \ 230static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 231{ \ 232 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 233} \ 234static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 235{ \ 236 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 237} 238 239CFQ_CFQQ_FNS(on_rr); 240CFQ_CFQQ_FNS(wait_request); 241CFQ_CFQQ_FNS(must_alloc); 242CFQ_CFQQ_FNS(must_alloc_slice); 243CFQ_CFQQ_FNS(must_dispatch); 244CFQ_CFQQ_FNS(fifo_expire); 245CFQ_CFQQ_FNS(idle_window); 246CFQ_CFQQ_FNS(prio_changed); 247#undef CFQ_CFQQ_FNS 248 249enum cfq_rq_state_flags { 250 CFQ_CRQ_FLAG_is_sync = 0, 251}; 252 253#define CFQ_CRQ_FNS(name) \ 254static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ 255{ \ 256 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ 257} \ 258static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ 259{ \ 260 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ 261} \ 262static inline int cfq_crq_##name(const struct cfq_rq *crq) \ 263{ \ 264 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ 265} 266 267CFQ_CRQ_FNS(is_sync); 268#undef CFQ_CRQ_FNS 269 270static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 271static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 272static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 273 274/* 275 * lots of deadline iosched dupes, can be abstracted later... 276 */ 277static inline void cfq_del_crq_hash(struct cfq_rq *crq) 278{ 279 hlist_del_init(&crq->hash); 280} 281 282static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) 283{ 284 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); 285 286 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); 287} 288 289static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) 290{ 291 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; 292 struct hlist_node *entry, *next; 293 294 hlist_for_each_safe(entry, next, hash_list) { 295 struct cfq_rq *crq = list_entry_hash(entry); 296 struct request *__rq = crq->request; 297 298 if (!rq_mergeable(__rq)) { 299 cfq_del_crq_hash(crq); 300 continue; 301 } 302 303 if (rq_hash_key(__rq) == offset) 304 return __rq; 305 } 306 307 return NULL; 308} 309 310/* 311 * scheduler run of queue, if there are requests pending and no one in the 312 * driver that will restart queueing 313 */ 314static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 315{ 316 if (cfqd->busy_queues) 317 kblockd_schedule_work(&cfqd->unplug_work); 318} 319 320static int cfq_queue_empty(request_queue_t *q) 321{ 322 struct cfq_data *cfqd = q->elevator->elevator_data; 323 324 return !cfqd->busy_queues; 325} 326 327static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 328{ 329 if (rw == READ || rw == WRITE_SYNC) 330 return task->pid; 331 332 return CFQ_KEY_ASYNC; 333} 334 335/* 336 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 337 * We choose the request that is closest to the head right now. Distance 338 * behind the head is penalized and only allowed to a certain extent. 339 */ 340static struct cfq_rq * 341cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 342{ 343 sector_t last, s1, s2, d1 = 0, d2 = 0; 344 unsigned long back_max; 345#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 346#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 347 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 348 349 if (crq1 == NULL || crq1 == crq2) 350 return crq2; 351 if (crq2 == NULL) 352 return crq1; 353 354 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 355 return crq1; 356 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 357 return crq2; 358 359 s1 = crq1->request->sector; 360 s2 = crq2->request->sector; 361 362 last = cfqd->last_sector; 363 364 /* 365 * by definition, 1KiB is 2 sectors 366 */ 367 back_max = cfqd->cfq_back_max * 2; 368 369 /* 370 * Strict one way elevator _except_ in the case where we allow 371 * short backward seeks which are biased as twice the cost of a 372 * similar forward seek. 373 */ 374 if (s1 >= last) 375 d1 = s1 - last; 376 else if (s1 + back_max >= last) 377 d1 = (last - s1) * cfqd->cfq_back_penalty; 378 else 379 wrap |= CFQ_RQ1_WRAP; 380 381 if (s2 >= last) 382 d2 = s2 - last; 383 else if (s2 + back_max >= last) 384 d2 = (last - s2) * cfqd->cfq_back_penalty; 385 else 386 wrap |= CFQ_RQ2_WRAP; 387 388 /* Found required data */ 389 390 /* 391 * By doing switch() on the bit mask "wrap" we avoid having to 392 * check two variables for all permutations: --> faster! 393 */ 394 switch (wrap) { 395 case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ 396 if (d1 < d2) 397 return crq1; 398 else if (d2 < d1) 399 return crq2; 400 else { 401 if (s1 >= s2) 402 return crq1; 403 else 404 return crq2; 405 } 406 407 case CFQ_RQ2_WRAP: 408 return crq1; 409 case CFQ_RQ1_WRAP: 410 return crq2; 411 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ 412 default: 413 /* 414 * Since both rqs are wrapped, 415 * start with the one that's further behind head 416 * (--> only *one* back seek required), 417 * since back seek takes more time than forward. 418 */ 419 if (s1 <= s2) 420 return crq1; 421 else 422 return crq2; 423 } 424} 425 426/* 427 * would be nice to take fifo expire time into account as well 428 */ 429static struct cfq_rq * 430cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 431 struct cfq_rq *last) 432{ 433 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 434 struct rb_node *rbnext, *rbprev; 435 436 if (!(rbnext = rb_next(&last->rb_node))) { 437 rbnext = rb_first(&cfqq->sort_list); 438 if (rbnext == &last->rb_node) 439 rbnext = NULL; 440 } 441 442 rbprev = rb_prev(&last->rb_node); 443 444 if (rbprev) 445 crq_prev = rb_entry_crq(rbprev); 446 if (rbnext) 447 crq_next = rb_entry_crq(rbnext); 448 449 return cfq_choose_req(cfqd, crq_next, crq_prev); 450} 451 452static void cfq_update_next_crq(struct cfq_rq *crq) 453{ 454 struct cfq_queue *cfqq = crq->cfq_queue; 455 456 if (cfqq->next_crq == crq) 457 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); 458} 459 460static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 461{ 462 struct cfq_data *cfqd = cfqq->cfqd; 463 struct list_head *list, *entry; 464 465 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 466 467 list_del(&cfqq->cfq_list); 468 469 if (cfq_class_rt(cfqq)) 470 list = &cfqd->cur_rr; 471 else if (cfq_class_idle(cfqq)) 472 list = &cfqd->idle_rr; 473 else { 474 /* 475 * if cfqq has requests in flight, don't allow it to be 476 * found in cfq_set_active_queue before it has finished them. 477 * this is done to increase fairness between a process that 478 * has lots of io pending vs one that only generates one 479 * sporadically or synchronously 480 */ 481 if (cfq_cfqq_dispatched(cfqq)) 482 list = &cfqd->busy_rr; 483 else 484 list = &cfqd->rr_list[cfqq->ioprio]; 485 } 486 487 /* 488 * if queue was preempted, just add to front to be fair. busy_rr 489 * isn't sorted, but insert at the back for fairness. 490 */ 491 if (preempted || list == &cfqd->busy_rr) { 492 if (preempted) 493 list = list->prev; 494 495 list_add_tail(&cfqq->cfq_list, list); 496 return; 497 } 498 499 /* 500 * sort by when queue was last serviced 501 */ 502 entry = list; 503 while ((entry = entry->prev) != list) { 504 struct cfq_queue *__cfqq = list_entry_cfqq(entry); 505 506 if (!__cfqq->service_last) 507 break; 508 if (time_before(__cfqq->service_last, cfqq->service_last)) 509 break; 510 } 511 512 list_add(&cfqq->cfq_list, entry); 513} 514 515/* 516 * add to busy list of queues for service, trying to be fair in ordering 517 * the pending list according to last request service 518 */ 519static inline void 520cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 521{ 522 BUG_ON(cfq_cfqq_on_rr(cfqq)); 523 cfq_mark_cfqq_on_rr(cfqq); 524 cfqd->busy_queues++; 525 526 cfq_resort_rr_list(cfqq, 0); 527} 528 529static inline void 530cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 531{ 532 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 533 cfq_clear_cfqq_on_rr(cfqq); 534 list_move(&cfqq->cfq_list, &cfqd->empty_list); 535 536 BUG_ON(!cfqd->busy_queues); 537 cfqd->busy_queues--; 538} 539 540/* 541 * rb tree support functions 542 */ 543static inline void cfq_del_crq_rb(struct cfq_rq *crq) 544{ 545 struct cfq_queue *cfqq = crq->cfq_queue; 546 struct cfq_data *cfqd = cfqq->cfqd; 547 const int sync = cfq_crq_is_sync(crq); 548 549 BUG_ON(!cfqq->queued[sync]); 550 cfqq->queued[sync]--; 551 552 cfq_update_next_crq(crq); 553 554 rb_erase(&crq->rb_node, &cfqq->sort_list); 555 556 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 557 cfq_del_cfqq_rr(cfqd, cfqq); 558} 559 560static struct cfq_rq * 561__cfq_add_crq_rb(struct cfq_rq *crq) 562{ 563 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 564 struct rb_node *parent = NULL; 565 struct cfq_rq *__crq; 566 567 while (*p) { 568 parent = *p; 569 __crq = rb_entry_crq(parent); 570 571 if (crq->rb_key < __crq->rb_key) 572 p = &(*p)->rb_left; 573 else if (crq->rb_key > __crq->rb_key) 574 p = &(*p)->rb_right; 575 else 576 return __crq; 577 } 578 579 rb_link_node(&crq->rb_node, parent, p); 580 return NULL; 581} 582 583static void cfq_add_crq_rb(struct cfq_rq *crq) 584{ 585 struct cfq_queue *cfqq = crq->cfq_queue; 586 struct cfq_data *cfqd = cfqq->cfqd; 587 struct request *rq = crq->request; 588 struct cfq_rq *__alias; 589 590 crq->rb_key = rq_rb_key(rq); 591 cfqq->queued[cfq_crq_is_sync(crq)]++; 592 593 /* 594 * looks a little odd, but the first insert might return an alias. 595 * if that happens, put the alias on the dispatch list 596 */ 597 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 598 cfq_dispatch_insert(cfqd->queue, __alias); 599 600 rb_insert_color(&crq->rb_node, &cfqq->sort_list); 601 602 if (!cfq_cfqq_on_rr(cfqq)) 603 cfq_add_cfqq_rr(cfqd, cfqq); 604 605 /* 606 * check if this request is a better next-serve candidate 607 */ 608 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 609} 610 611static inline void 612cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 613{ 614 rb_erase(&crq->rb_node, &cfqq->sort_list); 615 cfqq->queued[cfq_crq_is_sync(crq)]--; 616 617 cfq_add_crq_rb(crq); 618} 619 620static struct request * 621cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 622{ 623 struct task_struct *tsk = current; 624 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); 625 struct cfq_queue *cfqq; 626 struct rb_node *n; 627 sector_t sector; 628 629 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 630 if (!cfqq) 631 goto out; 632 633 sector = bio->bi_sector + bio_sectors(bio); 634 n = cfqq->sort_list.rb_node; 635 while (n) { 636 struct cfq_rq *crq = rb_entry_crq(n); 637 638 if (sector < crq->rb_key) 639 n = n->rb_left; 640 else if (sector > crq->rb_key) 641 n = n->rb_right; 642 else 643 return crq->request; 644 } 645 646out: 647 return NULL; 648} 649 650static void cfq_activate_request(request_queue_t *q, struct request *rq) 651{ 652 struct cfq_data *cfqd = q->elevator->elevator_data; 653 654 cfqd->rq_in_driver++; 655 656 /* 657 * If the depth is larger 1, it really could be queueing. But lets 658 * make the mark a little higher - idling could still be good for 659 * low queueing, and a low queueing number could also just indicate 660 * a SCSI mid layer like behaviour where limit+1 is often seen. 661 */ 662 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) 663 cfqd->hw_tag = 1; 664} 665 666static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 667{ 668 struct cfq_data *cfqd = q->elevator->elevator_data; 669 670 WARN_ON(!cfqd->rq_in_driver); 671 cfqd->rq_in_driver--; 672} 673 674static void cfq_remove_request(struct request *rq) 675{ 676 struct cfq_rq *crq = RQ_DATA(rq); 677 678 list_del_init(&rq->queuelist); 679 cfq_del_crq_rb(crq); 680 cfq_del_crq_hash(crq); 681} 682 683static int 684cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) 685{ 686 struct cfq_data *cfqd = q->elevator->elevator_data; 687 struct request *__rq; 688 int ret; 689 690 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); 691 if (__rq && elv_rq_merge_ok(__rq, bio)) { 692 ret = ELEVATOR_BACK_MERGE; 693 goto out; 694 } 695 696 __rq = cfq_find_rq_fmerge(cfqd, bio); 697 if (__rq && elv_rq_merge_ok(__rq, bio)) { 698 ret = ELEVATOR_FRONT_MERGE; 699 goto out; 700 } 701 702 return ELEVATOR_NO_MERGE; 703out: 704 *req = __rq; 705 return ret; 706} 707 708static void cfq_merged_request(request_queue_t *q, struct request *req) 709{ 710 struct cfq_data *cfqd = q->elevator->elevator_data; 711 struct cfq_rq *crq = RQ_DATA(req); 712 713 cfq_del_crq_hash(crq); 714 cfq_add_crq_hash(cfqd, crq); 715 716 if (rq_rb_key(req) != crq->rb_key) { 717 struct cfq_queue *cfqq = crq->cfq_queue; 718 719 cfq_update_next_crq(crq); 720 cfq_reposition_crq_rb(cfqq, crq); 721 } 722} 723 724static void 725cfq_merged_requests(request_queue_t *q, struct request *rq, 726 struct request *next) 727{ 728 cfq_merged_request(q, rq); 729 730 /* 731 * reposition in fifo if next is older than rq 732 */ 733 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 734 time_before(next->start_time, rq->start_time)) 735 list_move(&rq->queuelist, &next->queuelist); 736 737 cfq_remove_request(next); 738} 739 740static inline void 741__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 742{ 743 if (cfqq) { 744 /* 745 * stop potential idle class queues waiting service 746 */ 747 del_timer(&cfqd->idle_class_timer); 748 749 cfqq->slice_start = jiffies; 750 cfqq->slice_end = 0; 751 cfqq->slice_left = 0; 752 cfq_clear_cfqq_must_alloc_slice(cfqq); 753 cfq_clear_cfqq_fifo_expire(cfqq); 754 } 755 756 cfqd->active_queue = cfqq; 757} 758 759/* 760 * current cfqq expired its slice (or was too idle), select new one 761 */ 762static void 763__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 764 int preempted) 765{ 766 unsigned long now = jiffies; 767 768 if (cfq_cfqq_wait_request(cfqq)) 769 del_timer(&cfqd->idle_slice_timer); 770 771 if (!preempted && !cfq_cfqq_dispatched(cfqq)) { 772 cfqq->service_last = now; 773 cfq_schedule_dispatch(cfqd); 774 } 775 776 cfq_clear_cfqq_must_dispatch(cfqq); 777 cfq_clear_cfqq_wait_request(cfqq); 778 779 /* 780 * store what was left of this slice, if the queue idled out 781 * or was preempted 782 */ 783 if (time_after(cfqq->slice_end, now)) 784 cfqq->slice_left = cfqq->slice_end - now; 785 else 786 cfqq->slice_left = 0; 787 788 if (cfq_cfqq_on_rr(cfqq)) 789 cfq_resort_rr_list(cfqq, preempted); 790 791 if (cfqq == cfqd->active_queue) 792 cfqd->active_queue = NULL; 793 794 if (cfqd->active_cic) { 795 put_io_context(cfqd->active_cic->ioc); 796 cfqd->active_cic = NULL; 797 } 798 799 cfqd->dispatch_slice = 0; 800} 801 802static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) 803{ 804 struct cfq_queue *cfqq = cfqd->active_queue; 805 806 if (cfqq) 807 __cfq_slice_expired(cfqd, cfqq, preempted); 808} 809 810/* 811 * 0 812 * 0,1 813 * 0,1,2 814 * 0,1,2,3 815 * 0,1,2,3,4 816 * 0,1,2,3,4,5 817 * 0,1,2,3,4,5,6 818 * 0,1,2,3,4,5,6,7 819 */ 820static int cfq_get_next_prio_level(struct cfq_data *cfqd) 821{ 822 int prio, wrap; 823 824 prio = -1; 825 wrap = 0; 826 do { 827 int p; 828 829 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 830 if (!list_empty(&cfqd->rr_list[p])) { 831 prio = p; 832 break; 833 } 834 } 835 836 if (prio != -1) 837 break; 838 cfqd->cur_prio = 0; 839 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 840 cfqd->cur_end_prio = 0; 841 if (wrap) 842 break; 843 wrap = 1; 844 } 845 } while (1); 846 847 if (unlikely(prio == -1)) 848 return -1; 849 850 BUG_ON(prio >= CFQ_PRIO_LISTS); 851 852 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 853 854 cfqd->cur_prio = prio + 1; 855 if (cfqd->cur_prio > cfqd->cur_end_prio) { 856 cfqd->cur_end_prio = cfqd->cur_prio; 857 cfqd->cur_prio = 0; 858 } 859 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 860 cfqd->cur_prio = 0; 861 cfqd->cur_end_prio = 0; 862 } 863 864 return prio; 865} 866 867static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 868{ 869 struct cfq_queue *cfqq = NULL; 870 871 /* 872 * if current list is non-empty, grab first entry. if it is empty, 873 * get next prio level and grab first entry then if any are spliced 874 */ 875 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 876 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 877 878 /* 879 * If no new queues are available, check if the busy list has some 880 * before falling back to idle io. 881 */ 882 if (!cfqq && !list_empty(&cfqd->busy_rr)) 883 cfqq = list_entry_cfqq(cfqd->busy_rr.next); 884 885 /* 886 * if we have idle queues and no rt or be queues had pending 887 * requests, either allow immediate service if the grace period 888 * has passed or arm the idle grace timer 889 */ 890 if (!cfqq && !list_empty(&cfqd->idle_rr)) { 891 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 892 893 if (time_after_eq(jiffies, end)) 894 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 895 else 896 mod_timer(&cfqd->idle_class_timer, end); 897 } 898 899 __cfq_set_active_queue(cfqd, cfqq); 900 return cfqq; 901} 902 903#define CIC_SEEKY(cic) ((cic)->seek_mean > (128 * 1024)) 904 905static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 906 907{ 908 struct cfq_io_context *cic; 909 unsigned long sl; 910 911 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 912 WARN_ON(cfqq != cfqd->active_queue); 913 914 /* 915 * idle is disabled, either manually or by past process history 916 */ 917 if (!cfqd->cfq_slice_idle) 918 return 0; 919 if (!cfq_cfqq_idle_window(cfqq)) 920 return 0; 921 /* 922 * task has exited, don't wait 923 */ 924 cic = cfqd->active_cic; 925 if (!cic || !cic->ioc->task) 926 return 0; 927 928 cfq_mark_cfqq_must_dispatch(cfqq); 929 cfq_mark_cfqq_wait_request(cfqq); 930 931 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 932 933 /* 934 * we don't want to idle for seeks, but we do want to allow 935 * fair distribution of slice time for a process doing back-to-back 936 * seeks. so allow a little bit of time for him to submit a new rq 937 */ 938 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 939 sl = min(sl, msecs_to_jiffies(2)); 940 941 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 942 return 1; 943} 944 945static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 946{ 947 struct cfq_data *cfqd = q->elevator->elevator_data; 948 struct cfq_queue *cfqq = crq->cfq_queue; 949 struct request *rq; 950 951 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 952 cfq_remove_request(crq->request); 953 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 954 elv_dispatch_sort(q, crq->request); 955 956 rq = list_entry(q->queue_head.prev, struct request, queuelist); 957 cfqd->last_sector = rq->sector + rq->nr_sectors; 958} 959 960/* 961 * return expired entry, or NULL to just start from scratch in rbtree 962 */ 963static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 964{ 965 struct cfq_data *cfqd = cfqq->cfqd; 966 struct request *rq; 967 struct cfq_rq *crq; 968 969 if (cfq_cfqq_fifo_expire(cfqq)) 970 return NULL; 971 972 if (!list_empty(&cfqq->fifo)) { 973 int fifo = cfq_cfqq_class_sync(cfqq); 974 975 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 976 rq = crq->request; 977 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 978 cfq_mark_cfqq_fifo_expire(cfqq); 979 return crq; 980 } 981 } 982 983 return NULL; 984} 985 986/* 987 * Scale schedule slice based on io priority. Use the sync time slice only 988 * if a queue is marked sync and has sync io queued. A sync queue with async 989 * io only, should not get full sync slice length. 990 */ 991static inline int 992cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 993{ 994 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 995 996 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 997 998 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 999} 1000 1001static inline void 1002cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1003{ 1004 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 1005} 1006 1007static inline int 1008cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1009{ 1010 const int base_rq = cfqd->cfq_slice_async_rq; 1011 1012 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 1013 1014 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 1015} 1016 1017/* 1018 * get next queue for service 1019 */ 1020static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1021{ 1022 unsigned long now = jiffies; 1023 struct cfq_queue *cfqq; 1024 1025 cfqq = cfqd->active_queue; 1026 if (!cfqq) 1027 goto new_queue; 1028 1029 /* 1030 * slice has expired 1031 */ 1032 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) 1033 goto expire; 1034 1035 /* 1036 * if queue has requests, dispatch one. if not, check if 1037 * enough slice is left to wait for one 1038 */ 1039 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 1040 goto keep_queue; 1041 else if (cfq_cfqq_dispatched(cfqq)) { 1042 cfqq = NULL; 1043 goto keep_queue; 1044 } else if (cfq_cfqq_class_sync(cfqq)) { 1045 if (cfq_arm_slice_timer(cfqd, cfqq)) 1046 return NULL; 1047 } 1048 1049expire: 1050 cfq_slice_expired(cfqd, 0); 1051new_queue: 1052 cfqq = cfq_set_active_queue(cfqd); 1053keep_queue: 1054 return cfqq; 1055} 1056 1057static int 1058__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1059 int max_dispatch) 1060{ 1061 int dispatched = 0; 1062 1063 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 1064 1065 do { 1066 struct cfq_rq *crq; 1067 1068 /* 1069 * follow expired path, else get first next available 1070 */ 1071 if ((crq = cfq_check_fifo(cfqq)) == NULL) 1072 crq = cfqq->next_crq; 1073 1074 /* 1075 * finally, insert request into driver dispatch list 1076 */ 1077 cfq_dispatch_insert(cfqd->queue, crq); 1078 1079 cfqd->dispatch_slice++; 1080 dispatched++; 1081 1082 if (!cfqd->active_cic) { 1083 atomic_inc(&crq->io_context->ioc->refcount); 1084 cfqd->active_cic = crq->io_context; 1085 } 1086 1087 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 1088 break; 1089 1090 } while (dispatched < max_dispatch); 1091 1092 /* 1093 * if slice end isn't set yet, set it. 1094 */ 1095 if (!cfqq->slice_end) 1096 cfq_set_prio_slice(cfqd, cfqq); 1097 1098 /* 1099 * expire an async queue immediately if it has used up its slice. idle 1100 * queue always expire after 1 dispatch round. 1101 */ 1102 if ((!cfq_cfqq_sync(cfqq) && 1103 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1104 cfq_class_idle(cfqq) || 1105 !cfq_cfqq_idle_window(cfqq)) 1106 cfq_slice_expired(cfqd, 0); 1107 1108 return dispatched; 1109} 1110 1111static int 1112cfq_forced_dispatch_cfqqs(struct list_head *list) 1113{ 1114 struct cfq_queue *cfqq, *next; 1115 struct cfq_rq *crq; 1116 int dispatched; 1117 1118 dispatched = 0; 1119 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1120 while ((crq = cfqq->next_crq)) { 1121 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 1122 dispatched++; 1123 } 1124 BUG_ON(!list_empty(&cfqq->fifo)); 1125 } 1126 1127 return dispatched; 1128} 1129 1130static int 1131cfq_forced_dispatch(struct cfq_data *cfqd) 1132{ 1133 int i, dispatched = 0; 1134 1135 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1136 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1137 1138 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 1139 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1140 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 1141 1142 cfq_slice_expired(cfqd, 0); 1143 1144 BUG_ON(cfqd->busy_queues); 1145 1146 return dispatched; 1147} 1148 1149static int 1150cfq_dispatch_requests(request_queue_t *q, int force) 1151{ 1152 struct cfq_data *cfqd = q->elevator->elevator_data; 1153 struct cfq_queue *cfqq, *prev_cfqq; 1154 int dispatched; 1155 1156 if (!cfqd->busy_queues) 1157 return 0; 1158 1159 if (unlikely(force)) 1160 return cfq_forced_dispatch(cfqd); 1161 1162 dispatched = 0; 1163 prev_cfqq = NULL; 1164 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 1165 int max_dispatch; 1166 1167 /* 1168 * Don't repeat dispatch from the previous queue. 1169 */ 1170 if (prev_cfqq == cfqq) 1171 break; 1172 1173 cfq_clear_cfqq_must_dispatch(cfqq); 1174 cfq_clear_cfqq_wait_request(cfqq); 1175 del_timer(&cfqd->idle_slice_timer); 1176 1177 max_dispatch = cfqd->cfq_quantum; 1178 if (cfq_class_idle(cfqq)) 1179 max_dispatch = 1; 1180 1181 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1182 1183 /* 1184 * If the dispatch cfqq has idling enabled and is still 1185 * the active queue, break out. 1186 */ 1187 if (cfq_cfqq_idle_window(cfqq) && cfqd->active_queue) 1188 break; 1189 1190 prev_cfqq = cfqq; 1191 } 1192 1193 return dispatched; 1194} 1195 1196/* 1197 * task holds one reference to the queue, dropped when task exits. each crq 1198 * in-flight on this queue also holds a reference, dropped when crq is freed. 1199 * 1200 * queue lock must be held here. 1201 */ 1202static void cfq_put_queue(struct cfq_queue *cfqq) 1203{ 1204 struct cfq_data *cfqd = cfqq->cfqd; 1205 1206 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1207 1208 if (!atomic_dec_and_test(&cfqq->ref)) 1209 return; 1210 1211 BUG_ON(rb_first(&cfqq->sort_list)); 1212 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1213 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1214 1215 if (unlikely(cfqd->active_queue == cfqq)) 1216 __cfq_slice_expired(cfqd, cfqq, 0); 1217 1218 /* 1219 * it's on the empty list and still hashed 1220 */ 1221 list_del(&cfqq->cfq_list); 1222 hlist_del(&cfqq->cfq_hash); 1223 kmem_cache_free(cfq_pool, cfqq); 1224} 1225 1226static inline struct cfq_queue * 1227__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1228 const int hashval) 1229{ 1230 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1231 struct hlist_node *entry; 1232 struct cfq_queue *__cfqq; 1233 1234 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) { 1235 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1236 1237 if (__cfqq->key == key && (__p == prio || !prio)) 1238 return __cfqq; 1239 } 1240 1241 return NULL; 1242} 1243 1244static struct cfq_queue * 1245cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) 1246{ 1247 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1248} 1249 1250static void cfq_free_io_context(struct io_context *ioc) 1251{ 1252 struct cfq_io_context *__cic; 1253 struct rb_node *n; 1254 int freed = 0; 1255 1256 while ((n = rb_first(&ioc->cic_root)) != NULL) { 1257 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1258 rb_erase(&__cic->rb_node, &ioc->cic_root); 1259 kmem_cache_free(cfq_ioc_pool, __cic); 1260 freed++; 1261 } 1262 1263 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1264 complete(ioc_gone); 1265} 1266 1267static void cfq_trim(struct io_context *ioc) 1268{ 1269 ioc->set_ioprio = NULL; 1270 cfq_free_io_context(ioc); 1271} 1272 1273/* 1274 * Called with interrupts disabled 1275 */ 1276static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1277{ 1278 struct cfq_data *cfqd = cic->key; 1279 request_queue_t *q; 1280 1281 if (!cfqd) 1282 return; 1283 1284 q = cfqd->queue; 1285 1286 WARN_ON(!irqs_disabled()); 1287 1288 spin_lock(q->queue_lock); 1289 1290 if (cic->cfqq[ASYNC]) { 1291 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) 1292 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); 1293 cfq_put_queue(cic->cfqq[ASYNC]); 1294 cic->cfqq[ASYNC] = NULL; 1295 } 1296 1297 if (cic->cfqq[SYNC]) { 1298 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) 1299 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); 1300 cfq_put_queue(cic->cfqq[SYNC]); 1301 cic->cfqq[SYNC] = NULL; 1302 } 1303 1304 cic->key = NULL; 1305 list_del_init(&cic->queue_list); 1306 spin_unlock(q->queue_lock); 1307} 1308 1309static void cfq_exit_io_context(struct io_context *ioc) 1310{ 1311 struct cfq_io_context *__cic; 1312 unsigned long flags; 1313 struct rb_node *n; 1314 1315 /* 1316 * put the reference this task is holding to the various queues 1317 */ 1318 spin_lock_irqsave(&cfq_exit_lock, flags); 1319 1320 n = rb_first(&ioc->cic_root); 1321 while (n != NULL) { 1322 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1323 1324 cfq_exit_single_io_context(__cic); 1325 n = rb_next(n); 1326 } 1327 1328 spin_unlock_irqrestore(&cfq_exit_lock, flags); 1329} 1330 1331static struct cfq_io_context * 1332cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1333{ 1334 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1335 1336 if (cic) { 1337 memset(cic, 0, sizeof(*cic)); 1338 cic->last_end_request = jiffies; 1339 INIT_LIST_HEAD(&cic->queue_list); 1340 cic->dtor = cfq_free_io_context; 1341 cic->exit = cfq_exit_io_context; 1342 atomic_inc(&ioc_count); 1343 } 1344 1345 return cic; 1346} 1347 1348static void cfq_init_prio_data(struct cfq_queue *cfqq) 1349{ 1350 struct task_struct *tsk = current; 1351 int ioprio_class; 1352 1353 if (!cfq_cfqq_prio_changed(cfqq)) 1354 return; 1355 1356 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); 1357 switch (ioprio_class) { 1358 default: 1359 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1360 case IOPRIO_CLASS_NONE: 1361 /* 1362 * no prio set, place us in the middle of the BE classes 1363 */ 1364 cfqq->ioprio = task_nice_ioprio(tsk); 1365 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1366 break; 1367 case IOPRIO_CLASS_RT: 1368 cfqq->ioprio = task_ioprio(tsk); 1369 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1370 break; 1371 case IOPRIO_CLASS_BE: 1372 cfqq->ioprio = task_ioprio(tsk); 1373 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1374 break; 1375 case IOPRIO_CLASS_IDLE: 1376 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1377 cfqq->ioprio = 7; 1378 cfq_clear_cfqq_idle_window(cfqq); 1379 break; 1380 } 1381 1382 /* 1383 * keep track of original prio settings in case we have to temporarily 1384 * elevate the priority of this queue 1385 */ 1386 cfqq->org_ioprio = cfqq->ioprio; 1387 cfqq->org_ioprio_class = cfqq->ioprio_class; 1388 1389 if (cfq_cfqq_on_rr(cfqq)) 1390 cfq_resort_rr_list(cfqq, 0); 1391 1392 cfq_clear_cfqq_prio_changed(cfqq); 1393} 1394 1395static inline void changed_ioprio(struct cfq_io_context *cic) 1396{ 1397 struct cfq_data *cfqd = cic->key; 1398 struct cfq_queue *cfqq; 1399 1400 if (unlikely(!cfqd)) 1401 return; 1402 1403 spin_lock(cfqd->queue->queue_lock); 1404 1405 cfqq = cic->cfqq[ASYNC]; 1406 if (cfqq) { 1407 struct cfq_queue *new_cfqq; 1408 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, cic->ioc->task, 1409 GFP_ATOMIC); 1410 if (new_cfqq) { 1411 cic->cfqq[ASYNC] = new_cfqq; 1412 cfq_put_queue(cfqq); 1413 } 1414 } 1415 1416 cfqq = cic->cfqq[SYNC]; 1417 if (cfqq) 1418 cfq_mark_cfqq_prio_changed(cfqq); 1419 1420 spin_unlock(cfqd->queue->queue_lock); 1421} 1422 1423/* 1424 * callback from sys_ioprio_set, irqs are disabled 1425 */ 1426static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) 1427{ 1428 struct cfq_io_context *cic; 1429 struct rb_node *n; 1430 1431 spin_lock(&cfq_exit_lock); 1432 1433 n = rb_first(&ioc->cic_root); 1434 while (n != NULL) { 1435 cic = rb_entry(n, struct cfq_io_context, rb_node); 1436 1437 changed_ioprio(cic); 1438 n = rb_next(n); 1439 } 1440 1441 spin_unlock(&cfq_exit_lock); 1442 1443 return 0; 1444} 1445 1446static struct cfq_queue * 1447cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, 1448 gfp_t gfp_mask) 1449{ 1450 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1451 struct cfq_queue *cfqq, *new_cfqq = NULL; 1452 unsigned short ioprio; 1453 1454retry: 1455 ioprio = tsk->ioprio; 1456 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1457 1458 if (!cfqq) { 1459 if (new_cfqq) { 1460 cfqq = new_cfqq; 1461 new_cfqq = NULL; 1462 } else if (gfp_mask & __GFP_WAIT) { 1463 spin_unlock_irq(cfqd->queue->queue_lock); 1464 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1465 spin_lock_irq(cfqd->queue->queue_lock); 1466 goto retry; 1467 } else { 1468 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1469 if (!cfqq) 1470 goto out; 1471 } 1472 1473 memset(cfqq, 0, sizeof(*cfqq)); 1474 1475 INIT_HLIST_NODE(&cfqq->cfq_hash); 1476 INIT_LIST_HEAD(&cfqq->cfq_list); 1477 INIT_LIST_HEAD(&cfqq->fifo); 1478 1479 cfqq->key = key; 1480 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1481 atomic_set(&cfqq->ref, 0); 1482 cfqq->cfqd = cfqd; 1483 cfqq->service_last = 0; 1484 /* 1485 * set ->slice_left to allow preemption for a new process 1486 */ 1487 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1488 cfq_mark_cfqq_idle_window(cfqq); 1489 cfq_mark_cfqq_prio_changed(cfqq); 1490 cfq_init_prio_data(cfqq); 1491 } 1492 1493 if (new_cfqq) 1494 kmem_cache_free(cfq_pool, new_cfqq); 1495 1496 atomic_inc(&cfqq->ref); 1497out: 1498 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1499 return cfqq; 1500} 1501 1502static void 1503cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1504{ 1505 spin_lock(&cfq_exit_lock); 1506 rb_erase(&cic->rb_node, &ioc->cic_root); 1507 list_del_init(&cic->queue_list); 1508 spin_unlock(&cfq_exit_lock); 1509 kmem_cache_free(cfq_ioc_pool, cic); 1510 atomic_dec(&ioc_count); 1511} 1512 1513static struct cfq_io_context * 1514cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1515{ 1516 struct rb_node *n; 1517 struct cfq_io_context *cic; 1518 void *k, *key = cfqd; 1519 1520restart: 1521 n = ioc->cic_root.rb_node; 1522 while (n) { 1523 cic = rb_entry(n, struct cfq_io_context, rb_node); 1524 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1525 k = cic->key; 1526 if (unlikely(!k)) { 1527 cfq_drop_dead_cic(ioc, cic); 1528 goto restart; 1529 } 1530 1531 if (key < k) 1532 n = n->rb_left; 1533 else if (key > k) 1534 n = n->rb_right; 1535 else 1536 return cic; 1537 } 1538 1539 return NULL; 1540} 1541 1542static inline void 1543cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1544 struct cfq_io_context *cic) 1545{ 1546 struct rb_node **p; 1547 struct rb_node *parent; 1548 struct cfq_io_context *__cic; 1549 void *k; 1550 1551 cic->ioc = ioc; 1552 cic->key = cfqd; 1553 1554 ioc->set_ioprio = cfq_ioc_set_ioprio; 1555restart: 1556 parent = NULL; 1557 p = &ioc->cic_root.rb_node; 1558 while (*p) { 1559 parent = *p; 1560 __cic = rb_entry(parent, struct cfq_io_context, rb_node); 1561 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1562 k = __cic->key; 1563 if (unlikely(!k)) { 1564 cfq_drop_dead_cic(ioc, __cic); 1565 goto restart; 1566 } 1567 1568 if (cic->key < k) 1569 p = &(*p)->rb_left; 1570 else if (cic->key > k) 1571 p = &(*p)->rb_right; 1572 else 1573 BUG(); 1574 } 1575 1576 spin_lock(&cfq_exit_lock); 1577 rb_link_node(&cic->rb_node, parent, p); 1578 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1579 list_add(&cic->queue_list, &cfqd->cic_list); 1580 spin_unlock(&cfq_exit_lock); 1581} 1582 1583/* 1584 * Setup general io context and cfq io context. There can be several cfq 1585 * io contexts per general io context, if this process is doing io to more 1586 * than one device managed by cfq. 1587 */ 1588static struct cfq_io_context * 1589cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1590{ 1591 struct io_context *ioc = NULL; 1592 struct cfq_io_context *cic; 1593 1594 might_sleep_if(gfp_mask & __GFP_WAIT); 1595 1596 ioc = get_io_context(gfp_mask); 1597 if (!ioc) 1598 return NULL; 1599 1600 cic = cfq_cic_rb_lookup(cfqd, ioc); 1601 if (cic) 1602 goto out; 1603 1604 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1605 if (cic == NULL) 1606 goto err; 1607 1608 cfq_cic_link(cfqd, ioc, cic); 1609out: 1610 return cic; 1611err: 1612 put_io_context(ioc); 1613 return NULL; 1614} 1615 1616static void 1617cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1618{ 1619 unsigned long elapsed, ttime; 1620 1621 /* 1622 * if this context already has stuff queued, thinktime is from 1623 * last queue not last end 1624 */ 1625#if 0 1626 if (time_after(cic->last_end_request, cic->last_queue)) 1627 elapsed = jiffies - cic->last_end_request; 1628 else 1629 elapsed = jiffies - cic->last_queue; 1630#else 1631 elapsed = jiffies - cic->last_end_request; 1632#endif 1633 1634 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1635 1636 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1637 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1638 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1639} 1640 1641static void 1642cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1643 struct cfq_rq *crq) 1644{ 1645 sector_t sdist; 1646 u64 total; 1647 1648 if (cic->last_request_pos < crq->request->sector) 1649 sdist = crq->request->sector - cic->last_request_pos; 1650 else 1651 sdist = cic->last_request_pos - crq->request->sector; 1652 1653 /* 1654 * Don't allow the seek distance to get too large from the 1655 * odd fragment, pagein, etc 1656 */ 1657 if (cic->seek_samples <= 60) /* second&third seek */ 1658 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); 1659 else 1660 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); 1661 1662 cic->seek_samples = (7*cic->seek_samples + 256) / 8; 1663 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; 1664 total = cic->seek_total + (cic->seek_samples/2); 1665 do_div(total, cic->seek_samples); 1666 cic->seek_mean = (sector_t)total; 1667} 1668 1669/* 1670 * Disable idle window if the process thinks too long or seeks so much that 1671 * it doesn't matter 1672 */ 1673static void 1674cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1675 struct cfq_io_context *cic) 1676{ 1677 int enable_idle = cfq_cfqq_idle_window(cfqq); 1678 1679 if (!cic->ioc->task || !cfqd->cfq_slice_idle || 1680 (cfqd->hw_tag && CIC_SEEKY(cic))) 1681 enable_idle = 0; 1682 else if (sample_valid(cic->ttime_samples)) { 1683 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1684 enable_idle = 0; 1685 else 1686 enable_idle = 1; 1687 } 1688 1689 if (enable_idle) 1690 cfq_mark_cfqq_idle_window(cfqq); 1691 else 1692 cfq_clear_cfqq_idle_window(cfqq); 1693} 1694 1695 1696/* 1697 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1698 * no or if we aren't sure, a 1 will cause a preempt. 1699 */ 1700static int 1701cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1702 struct cfq_rq *crq) 1703{ 1704 struct cfq_queue *cfqq = cfqd->active_queue; 1705 1706 if (cfq_class_idle(new_cfqq)) 1707 return 0; 1708 1709 if (!cfqq) 1710 return 0; 1711 1712 if (cfq_class_idle(cfqq)) 1713 return 1; 1714 if (!cfq_cfqq_wait_request(new_cfqq)) 1715 return 0; 1716 /* 1717 * if it doesn't have slice left, forget it 1718 */ 1719 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1720 return 0; 1721 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1722 return 1; 1723 1724 return 0; 1725} 1726 1727/* 1728 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1729 * let it have half of its nominal slice. 1730 */ 1731static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1732{ 1733 struct cfq_queue *__cfqq, *next; 1734 1735 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) 1736 cfq_resort_rr_list(__cfqq, 1); 1737 1738 if (!cfqq->slice_left) 1739 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1740 1741 cfqq->slice_end = cfqq->slice_left + jiffies; 1742 cfq_slice_expired(cfqd, 1); 1743 __cfq_set_active_queue(cfqd, cfqq); 1744} 1745 1746/* 1747 * should really be a ll_rw_blk.c helper 1748 */ 1749static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1750{ 1751 request_queue_t *q = cfqd->queue; 1752 1753 if (!blk_queue_plugged(q)) 1754 q->request_fn(q); 1755 else 1756 __generic_unplug_device(q); 1757} 1758 1759/* 1760 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1761 * something we should do about it 1762 */ 1763static void 1764cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1765 struct cfq_rq *crq) 1766{ 1767 struct cfq_io_context *cic = crq->io_context; 1768 1769 /* 1770 * we never wait for an async request and we don't allow preemption 1771 * of an async request. so just return early 1772 */ 1773 if (!cfq_crq_is_sync(crq)) { 1774 /* 1775 * sync process issued an async request, if it's waiting 1776 * then expire it and kick rq handling. 1777 */ 1778 if (cic == cfqd->active_cic && 1779 del_timer(&cfqd->idle_slice_timer)) { 1780 cfq_slice_expired(cfqd, 0); 1781 cfq_start_queueing(cfqd, cfqq); 1782 } 1783 return; 1784 } 1785 1786 cfq_update_io_thinktime(cfqd, cic); 1787 cfq_update_io_seektime(cfqd, cic, crq); 1788 cfq_update_idle_window(cfqd, cfqq, cic); 1789 1790 cic->last_queue = jiffies; 1791 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; 1792 1793 if (cfqq == cfqd->active_queue) { 1794 /* 1795 * if we are waiting for a request for this queue, let it rip 1796 * immediately and flag that we must not expire this queue 1797 * just now 1798 */ 1799 if (cfq_cfqq_wait_request(cfqq)) { 1800 cfq_mark_cfqq_must_dispatch(cfqq); 1801 del_timer(&cfqd->idle_slice_timer); 1802 cfq_start_queueing(cfqd, cfqq); 1803 } 1804 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1805 /* 1806 * not the active queue - expire current slice if it is 1807 * idle and has expired it's mean thinktime or this new queue 1808 * has some old slice time left and is of higher priority 1809 */ 1810 cfq_preempt_queue(cfqd, cfqq); 1811 cfq_mark_cfqq_must_dispatch(cfqq); 1812 cfq_start_queueing(cfqd, cfqq); 1813 } 1814} 1815 1816static void cfq_insert_request(request_queue_t *q, struct request *rq) 1817{ 1818 struct cfq_data *cfqd = q->elevator->elevator_data; 1819 struct cfq_rq *crq = RQ_DATA(rq); 1820 struct cfq_queue *cfqq = crq->cfq_queue; 1821 1822 cfq_init_prio_data(cfqq); 1823 1824 cfq_add_crq_rb(crq); 1825 1826 list_add_tail(&rq->queuelist, &cfqq->fifo); 1827 1828 if (rq_mergeable(rq)) 1829 cfq_add_crq_hash(cfqd, crq); 1830 1831 cfq_crq_enqueued(cfqd, cfqq, crq); 1832} 1833 1834static void cfq_completed_request(request_queue_t *q, struct request *rq) 1835{ 1836 struct cfq_rq *crq = RQ_DATA(rq); 1837 struct cfq_queue *cfqq = crq->cfq_queue; 1838 struct cfq_data *cfqd = cfqq->cfqd; 1839 const int sync = cfq_crq_is_sync(crq); 1840 unsigned long now; 1841 1842 now = jiffies; 1843 1844 WARN_ON(!cfqd->rq_in_driver); 1845 WARN_ON(!cfqq->on_dispatch[sync]); 1846 cfqd->rq_in_driver--; 1847 cfqq->on_dispatch[sync]--; 1848 1849 if (!cfq_class_idle(cfqq)) 1850 cfqd->last_end_request = now; 1851 1852 if (!cfq_cfqq_dispatched(cfqq)) { 1853 if (cfq_cfqq_on_rr(cfqq)) { 1854 cfqq->service_last = now; 1855 cfq_resort_rr_list(cfqq, 0); 1856 } 1857 } 1858 1859 if (sync) 1860 crq->io_context->last_end_request = now; 1861 1862 /* 1863 * If this is the active queue, check if it needs to be expired, 1864 * or if we want to idle in case it has no pending requests. 1865 */ 1866 if (cfqd->active_queue == cfqq) { 1867 if (time_after(now, cfqq->slice_end)) 1868 cfq_slice_expired(cfqd, 0); 1869 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) { 1870 if (!cfq_arm_slice_timer(cfqd, cfqq)) 1871 cfq_schedule_dispatch(cfqd); 1872 } 1873 } 1874} 1875 1876static struct request * 1877cfq_former_request(request_queue_t *q, struct request *rq) 1878{ 1879 struct cfq_rq *crq = RQ_DATA(rq); 1880 struct rb_node *rbprev = rb_prev(&crq->rb_node); 1881 1882 if (rbprev) 1883 return rb_entry_crq(rbprev)->request; 1884 1885 return NULL; 1886} 1887 1888static struct request * 1889cfq_latter_request(request_queue_t *q, struct request *rq) 1890{ 1891 struct cfq_rq *crq = RQ_DATA(rq); 1892 struct rb_node *rbnext = rb_next(&crq->rb_node); 1893 1894 if (rbnext) 1895 return rb_entry_crq(rbnext)->request; 1896 1897 return NULL; 1898} 1899 1900/* 1901 * we temporarily boost lower priority queues if they are holding fs exclusive 1902 * resources. they are boosted to normal prio (CLASS_BE/4) 1903 */ 1904static void cfq_prio_boost(struct cfq_queue *cfqq) 1905{ 1906 const int ioprio_class = cfqq->ioprio_class; 1907 const int ioprio = cfqq->ioprio; 1908 1909 if (has_fs_excl()) { 1910 /* 1911 * boost idle prio on transactions that would lock out other 1912 * users of the filesystem 1913 */ 1914 if (cfq_class_idle(cfqq)) 1915 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1916 if (cfqq->ioprio > IOPRIO_NORM) 1917 cfqq->ioprio = IOPRIO_NORM; 1918 } else { 1919 /* 1920 * check if we need to unboost the queue 1921 */ 1922 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1923 cfqq->ioprio_class = cfqq->org_ioprio_class; 1924 if (cfqq->ioprio != cfqq->org_ioprio) 1925 cfqq->ioprio = cfqq->org_ioprio; 1926 } 1927 1928 /* 1929 * refile between round-robin lists if we moved the priority class 1930 */ 1931 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && 1932 cfq_cfqq_on_rr(cfqq)) 1933 cfq_resort_rr_list(cfqq, 0); 1934} 1935 1936static inline int 1937__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1938 struct task_struct *task, int rw) 1939{ 1940 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1941 !cfq_cfqq_must_alloc_slice(cfqq)) { 1942 cfq_mark_cfqq_must_alloc_slice(cfqq); 1943 return ELV_MQUEUE_MUST; 1944 } 1945 1946 return ELV_MQUEUE_MAY; 1947} 1948 1949static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1950{ 1951 struct cfq_data *cfqd = q->elevator->elevator_data; 1952 struct task_struct *tsk = current; 1953 struct cfq_queue *cfqq; 1954 1955 /* 1956 * don't force setup of a queue from here, as a call to may_queue 1957 * does not necessarily imply that a request actually will be queued. 1958 * so just lookup a possibly existing queue, or return 'may queue' 1959 * if that fails 1960 */ 1961 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); 1962 if (cfqq) { 1963 cfq_init_prio_data(cfqq); 1964 cfq_prio_boost(cfqq); 1965 1966 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1967 } 1968 1969 return ELV_MQUEUE_MAY; 1970} 1971 1972static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) 1973{ 1974 struct cfq_data *cfqd = q->elevator->elevator_data; 1975 1976 if (unlikely(cfqd->rq_starved)) { 1977 struct request_list *rl = &q->rq; 1978 1979 smp_mb(); 1980 if (waitqueue_active(&rl->wait[READ])) 1981 wake_up(&rl->wait[READ]); 1982 if (waitqueue_active(&rl->wait[WRITE])) 1983 wake_up(&rl->wait[WRITE]); 1984 } 1985} 1986 1987/* 1988 * queue lock held here 1989 */ 1990static void cfq_put_request(request_queue_t *q, struct request *rq) 1991{ 1992 struct cfq_data *cfqd = q->elevator->elevator_data; 1993 struct cfq_rq *crq = RQ_DATA(rq); 1994 1995 if (crq) { 1996 struct cfq_queue *cfqq = crq->cfq_queue; 1997 const int rw = rq_data_dir(rq); 1998 1999 BUG_ON(!cfqq->allocated[rw]); 2000 cfqq->allocated[rw]--; 2001 2002 put_io_context(crq->io_context->ioc); 2003 2004 mempool_free(crq, cfqd->crq_pool); 2005 rq->elevator_private = NULL; 2006 2007 cfq_check_waiters(q, cfqq); 2008 cfq_put_queue(cfqq); 2009 } 2010} 2011 2012/* 2013 * Allocate cfq data structures associated with this request. 2014 */ 2015static int 2016cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 2017 gfp_t gfp_mask) 2018{ 2019 struct cfq_data *cfqd = q->elevator->elevator_data; 2020 struct task_struct *tsk = current; 2021 struct cfq_io_context *cic; 2022 const int rw = rq_data_dir(rq); 2023 pid_t key = cfq_queue_pid(tsk, rw); 2024 struct cfq_queue *cfqq; 2025 struct cfq_rq *crq; 2026 unsigned long flags; 2027 int is_sync = key != CFQ_KEY_ASYNC; 2028 2029 might_sleep_if(gfp_mask & __GFP_WAIT); 2030 2031 cic = cfq_get_io_context(cfqd, gfp_mask); 2032 2033 spin_lock_irqsave(q->queue_lock, flags); 2034 2035 if (!cic) 2036 goto queue_fail; 2037 2038 if (!cic->cfqq[is_sync]) { 2039 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); 2040 if (!cfqq) 2041 goto queue_fail; 2042 2043 cic->cfqq[is_sync] = cfqq; 2044 } else 2045 cfqq = cic->cfqq[is_sync]; 2046 2047 cfqq->allocated[rw]++; 2048 cfq_clear_cfqq_must_alloc(cfqq); 2049 cfqd->rq_starved = 0; 2050 atomic_inc(&cfqq->ref); 2051 spin_unlock_irqrestore(q->queue_lock, flags); 2052 2053 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 2054 if (crq) { 2055 RB_CLEAR_NODE(&crq->rb_node); 2056 crq->rb_key = 0; 2057 crq->request = rq; 2058 INIT_HLIST_NODE(&crq->hash); 2059 crq->cfq_queue = cfqq; 2060 crq->io_context = cic; 2061 2062 if (is_sync) 2063 cfq_mark_crq_is_sync(crq); 2064 else 2065 cfq_clear_crq_is_sync(crq); 2066 2067 rq->elevator_private = crq; 2068 return 0; 2069 } 2070 2071 spin_lock_irqsave(q->queue_lock, flags); 2072 cfqq->allocated[rw]--; 2073 if (!(cfqq->allocated[0] + cfqq->allocated[1])) 2074 cfq_mark_cfqq_must_alloc(cfqq); 2075 cfq_put_queue(cfqq); 2076queue_fail: 2077 if (cic) 2078 put_io_context(cic->ioc); 2079 /* 2080 * mark us rq allocation starved. we need to kickstart the process 2081 * ourselves if there are no pending requests that can do it for us. 2082 * that would be an extremely rare OOM situation 2083 */ 2084 cfqd->rq_starved = 1; 2085 cfq_schedule_dispatch(cfqd); 2086 spin_unlock_irqrestore(q->queue_lock, flags); 2087 return 1; 2088} 2089 2090static void cfq_kick_queue(void *data) 2091{ 2092 request_queue_t *q = data; 2093 struct cfq_data *cfqd = q->elevator->elevator_data; 2094 unsigned long flags; 2095 2096 spin_lock_irqsave(q->queue_lock, flags); 2097 2098 if (cfqd->rq_starved) { 2099 struct request_list *rl = &q->rq; 2100 2101 /* 2102 * we aren't guaranteed to get a request after this, but we 2103 * have to be opportunistic 2104 */ 2105 smp_mb(); 2106 if (waitqueue_active(&rl->wait[READ])) 2107 wake_up(&rl->wait[READ]); 2108 if (waitqueue_active(&rl->wait[WRITE])) 2109 wake_up(&rl->wait[WRITE]); 2110 } 2111 2112 blk_remove_plug(q); 2113 q->request_fn(q); 2114 spin_unlock_irqrestore(q->queue_lock, flags); 2115} 2116 2117/* 2118 * Timer running if the active_queue is currently idling inside its time slice 2119 */ 2120static void cfq_idle_slice_timer(unsigned long data) 2121{ 2122 struct cfq_data *cfqd = (struct cfq_data *) data; 2123 struct cfq_queue *cfqq; 2124 unsigned long flags; 2125 2126 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2127 2128 if ((cfqq = cfqd->active_queue) != NULL) { 2129 unsigned long now = jiffies; 2130 2131 /* 2132 * expired 2133 */ 2134 if (time_after(now, cfqq->slice_end)) 2135 goto expire; 2136 2137 /* 2138 * only expire and reinvoke request handler, if there are 2139 * other queues with pending requests 2140 */ 2141 if (!cfqd->busy_queues) 2142 goto out_cont; 2143 2144 /* 2145 * not expired and it has a request pending, let it dispatch 2146 */ 2147 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { 2148 cfq_mark_cfqq_must_dispatch(cfqq); 2149 goto out_kick; 2150 } 2151 } 2152expire: 2153 cfq_slice_expired(cfqd, 0); 2154out_kick: 2155 cfq_schedule_dispatch(cfqd); 2156out_cont: 2157 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2158} 2159 2160/* 2161 * Timer running if an idle class queue is waiting for service 2162 */ 2163static void cfq_idle_class_timer(unsigned long data) 2164{ 2165 struct cfq_data *cfqd = (struct cfq_data *) data; 2166 unsigned long flags, end; 2167 2168 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2169 2170 /* 2171 * race with a non-idle queue, reset timer 2172 */ 2173 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2174 if (!time_after_eq(jiffies, end)) 2175 mod_timer(&cfqd->idle_class_timer, end); 2176 else 2177 cfq_schedule_dispatch(cfqd); 2178 2179 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2180} 2181 2182static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2183{ 2184 del_timer_sync(&cfqd->idle_slice_timer); 2185 del_timer_sync(&cfqd->idle_class_timer); 2186 blk_sync_queue(cfqd->queue); 2187} 2188 2189static void cfq_exit_queue(elevator_t *e) 2190{ 2191 struct cfq_data *cfqd = e->elevator_data; 2192 request_queue_t *q = cfqd->queue; 2193 2194 cfq_shutdown_timer_wq(cfqd); 2195 2196 spin_lock(&cfq_exit_lock); 2197 spin_lock_irq(q->queue_lock); 2198 2199 if (cfqd->active_queue) 2200 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 2201 2202 while (!list_empty(&cfqd->cic_list)) { 2203 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2204 struct cfq_io_context, 2205 queue_list); 2206 if (cic->cfqq[ASYNC]) { 2207 cfq_put_queue(cic->cfqq[ASYNC]); 2208 cic->cfqq[ASYNC] = NULL; 2209 } 2210 if (cic->cfqq[SYNC]) { 2211 cfq_put_queue(cic->cfqq[SYNC]); 2212 cic->cfqq[SYNC] = NULL; 2213 } 2214 cic->key = NULL; 2215 list_del_init(&cic->queue_list); 2216 } 2217 2218 spin_unlock_irq(q->queue_lock); 2219 spin_unlock(&cfq_exit_lock); 2220 2221 cfq_shutdown_timer_wq(cfqd); 2222 2223 mempool_destroy(cfqd->crq_pool); 2224 kfree(cfqd->crq_hash); 2225 kfree(cfqd->cfq_hash); 2226 kfree(cfqd); 2227} 2228 2229static void *cfq_init_queue(request_queue_t *q, elevator_t *e) 2230{ 2231 struct cfq_data *cfqd; 2232 int i; 2233 2234 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2235 if (!cfqd) 2236 return NULL; 2237 2238 memset(cfqd, 0, sizeof(*cfqd)); 2239 2240 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2241 INIT_LIST_HEAD(&cfqd->rr_list[i]); 2242 2243 INIT_LIST_HEAD(&cfqd->busy_rr); 2244 INIT_LIST_HEAD(&cfqd->cur_rr); 2245 INIT_LIST_HEAD(&cfqd->idle_rr); 2246 INIT_LIST_HEAD(&cfqd->empty_list); 2247 INIT_LIST_HEAD(&cfqd->cic_list); 2248 2249 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 2250 if (!cfqd->crq_hash) 2251 goto out_crqhash; 2252 2253 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); 2254 if (!cfqd->cfq_hash) 2255 goto out_cfqhash; 2256 2257 cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); 2258 if (!cfqd->crq_pool) 2259 goto out_crqpool; 2260 2261 for (i = 0; i < CFQ_MHASH_ENTRIES; i++) 2262 INIT_HLIST_HEAD(&cfqd->crq_hash[i]); 2263 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2264 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2265 2266 cfqd->queue = q; 2267 2268 init_timer(&cfqd->idle_slice_timer); 2269 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 2270 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 2271 2272 init_timer(&cfqd->idle_class_timer); 2273 cfqd->idle_class_timer.function = cfq_idle_class_timer; 2274 cfqd->idle_class_timer.data = (unsigned long) cfqd; 2275 2276 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 2277 2278 cfqd->cfq_queued = cfq_queued; 2279 cfqd->cfq_quantum = cfq_quantum; 2280 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 2281 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 2282 cfqd->cfq_back_max = cfq_back_max; 2283 cfqd->cfq_back_penalty = cfq_back_penalty; 2284 cfqd->cfq_slice[0] = cfq_slice_async; 2285 cfqd->cfq_slice[1] = cfq_slice_sync; 2286 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2287 cfqd->cfq_slice_idle = cfq_slice_idle; 2288 2289 return cfqd; 2290out_crqpool: 2291 kfree(cfqd->cfq_hash); 2292out_cfqhash: 2293 kfree(cfqd->crq_hash); 2294out_crqhash: 2295 kfree(cfqd); 2296 return NULL; 2297} 2298 2299static void cfq_slab_kill(void) 2300{ 2301 if (crq_pool) 2302 kmem_cache_destroy(crq_pool); 2303 if (cfq_pool) 2304 kmem_cache_destroy(cfq_pool); 2305 if (cfq_ioc_pool) 2306 kmem_cache_destroy(cfq_ioc_pool); 2307} 2308 2309static int __init cfq_slab_setup(void) 2310{ 2311 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, 2312 NULL, NULL); 2313 if (!crq_pool) 2314 goto fail; 2315 2316 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2317 NULL, NULL); 2318 if (!cfq_pool) 2319 goto fail; 2320 2321 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", 2322 sizeof(struct cfq_io_context), 0, 0, NULL, NULL); 2323 if (!cfq_ioc_pool) 2324 goto fail; 2325 2326 return 0; 2327fail: 2328 cfq_slab_kill(); 2329 return -ENOMEM; 2330} 2331 2332/* 2333 * sysfs parts below --> 2334 */ 2335 2336static ssize_t 2337cfq_var_show(unsigned int var, char *page) 2338{ 2339 return sprintf(page, "%d\n", var); 2340} 2341 2342static ssize_t 2343cfq_var_store(unsigned int *var, const char *page, size_t count) 2344{ 2345 char *p = (char *) page; 2346 2347 *var = simple_strtoul(p, &p, 10); 2348 return count; 2349} 2350 2351#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2352static ssize_t __FUNC(elevator_t *e, char *page) \ 2353{ \ 2354 struct cfq_data *cfqd = e->elevator_data; \ 2355 unsigned int __data = __VAR; \ 2356 if (__CONV) \ 2357 __data = jiffies_to_msecs(__data); \ 2358 return cfq_var_show(__data, (page)); \ 2359} 2360SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2361SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); 2362SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2363SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2364SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2365SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 2366SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2367SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2368SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2369SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2370#undef SHOW_FUNCTION 2371 2372#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2373static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2374{ \ 2375 struct cfq_data *cfqd = e->elevator_data; \ 2376 unsigned int __data; \ 2377 int ret = cfq_var_store(&__data, (page), count); \ 2378 if (__data < (MIN)) \ 2379 __data = (MIN); \ 2380 else if (__data > (MAX)) \ 2381 __data = (MAX); \ 2382 if (__CONV) \ 2383 *(__PTR) = msecs_to_jiffies(__data); \ 2384 else \ 2385 *(__PTR) = __data; \ 2386 return ret; \ 2387} 2388STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2389STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); 2390STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2391STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2392STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2393STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2394STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2395STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2396STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2397STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2398#undef STORE_FUNCTION 2399 2400#define CFQ_ATTR(name) \ 2401 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 2402 2403static struct elv_fs_entry cfq_attrs[] = { 2404 CFQ_ATTR(quantum), 2405 CFQ_ATTR(queued), 2406 CFQ_ATTR(fifo_expire_sync), 2407 CFQ_ATTR(fifo_expire_async), 2408 CFQ_ATTR(back_seek_max), 2409 CFQ_ATTR(back_seek_penalty), 2410 CFQ_ATTR(slice_sync), 2411 CFQ_ATTR(slice_async), 2412 CFQ_ATTR(slice_async_rq), 2413 CFQ_ATTR(slice_idle), 2414 __ATTR_NULL 2415}; 2416 2417static struct elevator_type iosched_cfq = { 2418 .ops = { 2419 .elevator_merge_fn = cfq_merge, 2420 .elevator_merged_fn = cfq_merged_request, 2421 .elevator_merge_req_fn = cfq_merged_requests, 2422 .elevator_dispatch_fn = cfq_dispatch_requests, 2423 .elevator_add_req_fn = cfq_insert_request, 2424 .elevator_activate_req_fn = cfq_activate_request, 2425 .elevator_deactivate_req_fn = cfq_deactivate_request, 2426 .elevator_queue_empty_fn = cfq_queue_empty, 2427 .elevator_completed_req_fn = cfq_completed_request, 2428 .elevator_former_req_fn = cfq_former_request, 2429 .elevator_latter_req_fn = cfq_latter_request, 2430 .elevator_set_req_fn = cfq_set_request, 2431 .elevator_put_req_fn = cfq_put_request, 2432 .elevator_may_queue_fn = cfq_may_queue, 2433 .elevator_init_fn = cfq_init_queue, 2434 .elevator_exit_fn = cfq_exit_queue, 2435 .trim = cfq_trim, 2436 }, 2437 .elevator_attrs = cfq_attrs, 2438 .elevator_name = "cfq", 2439 .elevator_owner = THIS_MODULE, 2440}; 2441 2442static int __init cfq_init(void) 2443{ 2444 int ret; 2445 2446 /* 2447 * could be 0 on HZ < 1000 setups 2448 */ 2449 if (!cfq_slice_async) 2450 cfq_slice_async = 1; 2451 if (!cfq_slice_idle) 2452 cfq_slice_idle = 1; 2453 2454 if (cfq_slab_setup()) 2455 return -ENOMEM; 2456 2457 ret = elv_register(&iosched_cfq); 2458 if (ret) 2459 cfq_slab_kill(); 2460 2461 return ret; 2462} 2463 2464static void __exit cfq_exit(void) 2465{ 2466 DECLARE_COMPLETION(all_gone); 2467 elv_unregister(&iosched_cfq); 2468 ioc_gone = &all_gone; 2469 /* ioc_gone's update must be visible before reading ioc_count */ 2470 smp_wmb(); 2471 if (atomic_read(&ioc_count)) 2472 wait_for_completion(ioc_gone); 2473 synchronize_rcu(); 2474 cfq_slab_kill(); 2475} 2476 2477module_init(cfq_init); 2478module_exit(cfq_exit); 2479 2480MODULE_AUTHOR("Jens Axboe"); 2481MODULE_LICENSE("GPL"); 2482MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");