Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.17-rc2 2481 lines 59 kB view raw
1/* 2 * CFQ, or complete fairness queueing, disk scheduler. 3 * 4 * Based on ideas from a previously unfinished io 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 6 * 7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 8 */ 9#include <linux/config.h> 10#include <linux/module.h> 11#include <linux/blkdev.h> 12#include <linux/elevator.h> 13#include <linux/hash.h> 14#include <linux/rbtree.h> 15#include <linux/ioprio.h> 16 17/* 18 * tunables 19 */ 20static const int cfq_quantum = 4; /* max queue in one round of service */ 21static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 22static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 23static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 24static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 25 26static const int cfq_slice_sync = HZ / 10; 27static int cfq_slice_async = HZ / 25; 28static const int cfq_slice_async_rq = 2; 29static int cfq_slice_idle = HZ / 70; 30 31#define CFQ_IDLE_GRACE (HZ / 10) 32#define CFQ_SLICE_SCALE (5) 33 34#define CFQ_KEY_ASYNC (0) 35 36static DEFINE_RWLOCK(cfq_exit_lock); 37 38/* 39 * for the hash of cfqq inside the cfqd 40 */ 41#define CFQ_QHASH_SHIFT 6 42#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 43#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 44 45/* 46 * for the hash of crq inside the cfqq 47 */ 48#define CFQ_MHASH_SHIFT 6 49#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) 50#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) 51#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) 52#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 53#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) 54 55#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 56#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 57 58#define RQ_DATA(rq) (rq)->elevator_private 59 60/* 61 * rb-tree defines 62 */ 63#define RB_NONE (2) 64#define RB_EMPTY(node) ((node)->rb_node == NULL) 65#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE 66#define RB_CLEAR(node) do { \ 67 (node)->rb_parent = NULL; \ 68 RB_CLEAR_COLOR((node)); \ 69 (node)->rb_right = NULL; \ 70 (node)->rb_left = NULL; \ 71} while (0) 72#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) 73#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) 74#define rq_rb_key(rq) (rq)->sector 75 76static kmem_cache_t *crq_pool; 77static kmem_cache_t *cfq_pool; 78static kmem_cache_t *cfq_ioc_pool; 79 80static atomic_t ioc_count = ATOMIC_INIT(0); 81static struct completion *ioc_gone; 82 83#define CFQ_PRIO_LISTS IOPRIO_BE_NR 84#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 85#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) 86#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 87 88#define ASYNC (0) 89#define SYNC (1) 90 91#define cfq_cfqq_dispatched(cfqq) \ 92 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) 93 94#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) 95 96#define cfq_cfqq_sync(cfqq) \ 97 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 98 99#define sample_valid(samples) ((samples) > 80) 100 101/* 102 * Per block device queue structure 103 */ 104struct cfq_data { 105 request_queue_t *queue; 106 107 /* 108 * rr list of queues with requests and the count of them 109 */ 110 struct list_head rr_list[CFQ_PRIO_LISTS]; 111 struct list_head busy_rr; 112 struct list_head cur_rr; 113 struct list_head idle_rr; 114 unsigned int busy_queues; 115 116 /* 117 * non-ordered list of empty cfqq's 118 */ 119 struct list_head empty_list; 120 121 /* 122 * cfqq lookup hash 123 */ 124 struct hlist_head *cfq_hash; 125 126 /* 127 * global crq hash for all queues 128 */ 129 struct hlist_head *crq_hash; 130 131 unsigned int max_queued; 132 133 mempool_t *crq_pool; 134 135 int rq_in_driver; 136 137 /* 138 * schedule slice state info 139 */ 140 /* 141 * idle window management 142 */ 143 struct timer_list idle_slice_timer; 144 struct work_struct unplug_work; 145 146 struct cfq_queue *active_queue; 147 struct cfq_io_context *active_cic; 148 int cur_prio, cur_end_prio; 149 unsigned int dispatch_slice; 150 151 struct timer_list idle_class_timer; 152 153 sector_t last_sector; 154 unsigned long last_end_request; 155 156 unsigned int rq_starved; 157 158 /* 159 * tunables, see top of file 160 */ 161 unsigned int cfq_quantum; 162 unsigned int cfq_queued; 163 unsigned int cfq_fifo_expire[2]; 164 unsigned int cfq_back_penalty; 165 unsigned int cfq_back_max; 166 unsigned int cfq_slice[2]; 167 unsigned int cfq_slice_async_rq; 168 unsigned int cfq_slice_idle; 169 170 struct list_head cic_list; 171}; 172 173/* 174 * Per process-grouping structure 175 */ 176struct cfq_queue { 177 /* reference count */ 178 atomic_t ref; 179 /* parent cfq_data */ 180 struct cfq_data *cfqd; 181 /* cfqq lookup hash */ 182 struct hlist_node cfq_hash; 183 /* hash key */ 184 unsigned int key; 185 /* on either rr or empty list of cfqd */ 186 struct list_head cfq_list; 187 /* sorted list of pending requests */ 188 struct rb_root sort_list; 189 /* if fifo isn't expired, next request to serve */ 190 struct cfq_rq *next_crq; 191 /* requests queued in sort_list */ 192 int queued[2]; 193 /* currently allocated requests */ 194 int allocated[2]; 195 /* fifo list of requests in sort_list */ 196 struct list_head fifo; 197 198 unsigned long slice_start; 199 unsigned long slice_end; 200 unsigned long slice_left; 201 unsigned long service_last; 202 203 /* number of requests that are on the dispatch list */ 204 int on_dispatch[2]; 205 206 /* io prio of this group */ 207 unsigned short ioprio, org_ioprio; 208 unsigned short ioprio_class, org_ioprio_class; 209 210 /* various state flags, see below */ 211 unsigned int flags; 212}; 213 214struct cfq_rq { 215 struct rb_node rb_node; 216 sector_t rb_key; 217 struct request *request; 218 struct hlist_node hash; 219 220 struct cfq_queue *cfq_queue; 221 struct cfq_io_context *io_context; 222 223 unsigned int crq_flags; 224}; 225 226enum cfqq_state_flags { 227 CFQ_CFQQ_FLAG_on_rr = 0, 228 CFQ_CFQQ_FLAG_wait_request, 229 CFQ_CFQQ_FLAG_must_alloc, 230 CFQ_CFQQ_FLAG_must_alloc_slice, 231 CFQ_CFQQ_FLAG_must_dispatch, 232 CFQ_CFQQ_FLAG_fifo_expire, 233 CFQ_CFQQ_FLAG_idle_window, 234 CFQ_CFQQ_FLAG_prio_changed, 235}; 236 237#define CFQ_CFQQ_FNS(name) \ 238static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 239{ \ 240 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 241} \ 242static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 243{ \ 244 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 245} \ 246static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 247{ \ 248 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 249} 250 251CFQ_CFQQ_FNS(on_rr); 252CFQ_CFQQ_FNS(wait_request); 253CFQ_CFQQ_FNS(must_alloc); 254CFQ_CFQQ_FNS(must_alloc_slice); 255CFQ_CFQQ_FNS(must_dispatch); 256CFQ_CFQQ_FNS(fifo_expire); 257CFQ_CFQQ_FNS(idle_window); 258CFQ_CFQQ_FNS(prio_changed); 259#undef CFQ_CFQQ_FNS 260 261enum cfq_rq_state_flags { 262 CFQ_CRQ_FLAG_is_sync = 0, 263}; 264 265#define CFQ_CRQ_FNS(name) \ 266static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ 267{ \ 268 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ 269} \ 270static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ 271{ \ 272 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ 273} \ 274static inline int cfq_crq_##name(const struct cfq_rq *crq) \ 275{ \ 276 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ 277} 278 279CFQ_CRQ_FNS(is_sync); 280#undef CFQ_CRQ_FNS 281 282static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 283static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 284static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 285 286#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) 287 288/* 289 * lots of deadline iosched dupes, can be abstracted later... 290 */ 291static inline void cfq_del_crq_hash(struct cfq_rq *crq) 292{ 293 hlist_del_init(&crq->hash); 294} 295 296static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) 297{ 298 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); 299 300 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); 301} 302 303static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) 304{ 305 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; 306 struct hlist_node *entry, *next; 307 308 hlist_for_each_safe(entry, next, hash_list) { 309 struct cfq_rq *crq = list_entry_hash(entry); 310 struct request *__rq = crq->request; 311 312 if (!rq_mergeable(__rq)) { 313 cfq_del_crq_hash(crq); 314 continue; 315 } 316 317 if (rq_hash_key(__rq) == offset) 318 return __rq; 319 } 320 321 return NULL; 322} 323 324/* 325 * scheduler run of queue, if there are requests pending and no one in the 326 * driver that will restart queueing 327 */ 328static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 329{ 330 if (cfqd->busy_queues) 331 kblockd_schedule_work(&cfqd->unplug_work); 332} 333 334static int cfq_queue_empty(request_queue_t *q) 335{ 336 struct cfq_data *cfqd = q->elevator->elevator_data; 337 338 return !cfqd->busy_queues; 339} 340 341static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 342{ 343 if (rw == READ || process_sync(task)) 344 return task->pid; 345 346 return CFQ_KEY_ASYNC; 347} 348 349/* 350 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 351 * We choose the request that is closest to the head right now. Distance 352 * behind the head is penalized and only allowed to a certain extent. 353 */ 354static struct cfq_rq * 355cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 356{ 357 sector_t last, s1, s2, d1 = 0, d2 = 0; 358 unsigned long back_max; 359#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 360#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 361 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 362 363 if (crq1 == NULL || crq1 == crq2) 364 return crq2; 365 if (crq2 == NULL) 366 return crq1; 367 368 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 369 return crq1; 370 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 371 return crq2; 372 373 s1 = crq1->request->sector; 374 s2 = crq2->request->sector; 375 376 last = cfqd->last_sector; 377 378 /* 379 * by definition, 1KiB is 2 sectors 380 */ 381 back_max = cfqd->cfq_back_max * 2; 382 383 /* 384 * Strict one way elevator _except_ in the case where we allow 385 * short backward seeks which are biased as twice the cost of a 386 * similar forward seek. 387 */ 388 if (s1 >= last) 389 d1 = s1 - last; 390 else if (s1 + back_max >= last) 391 d1 = (last - s1) * cfqd->cfq_back_penalty; 392 else 393 wrap |= CFQ_RQ1_WRAP; 394 395 if (s2 >= last) 396 d2 = s2 - last; 397 else if (s2 + back_max >= last) 398 d2 = (last - s2) * cfqd->cfq_back_penalty; 399 else 400 wrap |= CFQ_RQ2_WRAP; 401 402 /* Found required data */ 403 404 /* 405 * By doing switch() on the bit mask "wrap" we avoid having to 406 * check two variables for all permutations: --> faster! 407 */ 408 switch (wrap) { 409 case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ 410 if (d1 < d2) 411 return crq1; 412 else if (d2 < d1) 413 return crq2; 414 else { 415 if (s1 >= s2) 416 return crq1; 417 else 418 return crq2; 419 } 420 421 case CFQ_RQ2_WRAP: 422 return crq1; 423 case CFQ_RQ1_WRAP: 424 return crq2; 425 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ 426 default: 427 /* 428 * Since both rqs are wrapped, 429 * start with the one that's further behind head 430 * (--> only *one* back seek required), 431 * since back seek takes more time than forward. 432 */ 433 if (s1 <= s2) 434 return crq1; 435 else 436 return crq2; 437 } 438} 439 440/* 441 * would be nice to take fifo expire time into account as well 442 */ 443static struct cfq_rq * 444cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 445 struct cfq_rq *last) 446{ 447 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 448 struct rb_node *rbnext, *rbprev; 449 450 if (!(rbnext = rb_next(&last->rb_node))) { 451 rbnext = rb_first(&cfqq->sort_list); 452 if (rbnext == &last->rb_node) 453 rbnext = NULL; 454 } 455 456 rbprev = rb_prev(&last->rb_node); 457 458 if (rbprev) 459 crq_prev = rb_entry_crq(rbprev); 460 if (rbnext) 461 crq_next = rb_entry_crq(rbnext); 462 463 return cfq_choose_req(cfqd, crq_next, crq_prev); 464} 465 466static void cfq_update_next_crq(struct cfq_rq *crq) 467{ 468 struct cfq_queue *cfqq = crq->cfq_queue; 469 470 if (cfqq->next_crq == crq) 471 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); 472} 473 474static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 475{ 476 struct cfq_data *cfqd = cfqq->cfqd; 477 struct list_head *list, *entry; 478 479 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 480 481 list_del(&cfqq->cfq_list); 482 483 if (cfq_class_rt(cfqq)) 484 list = &cfqd->cur_rr; 485 else if (cfq_class_idle(cfqq)) 486 list = &cfqd->idle_rr; 487 else { 488 /* 489 * if cfqq has requests in flight, don't allow it to be 490 * found in cfq_set_active_queue before it has finished them. 491 * this is done to increase fairness between a process that 492 * has lots of io pending vs one that only generates one 493 * sporadically or synchronously 494 */ 495 if (cfq_cfqq_dispatched(cfqq)) 496 list = &cfqd->busy_rr; 497 else 498 list = &cfqd->rr_list[cfqq->ioprio]; 499 } 500 501 /* 502 * if queue was preempted, just add to front to be fair. busy_rr 503 * isn't sorted. 504 */ 505 if (preempted || list == &cfqd->busy_rr) { 506 list_add(&cfqq->cfq_list, list); 507 return; 508 } 509 510 /* 511 * sort by when queue was last serviced 512 */ 513 entry = list; 514 while ((entry = entry->prev) != list) { 515 struct cfq_queue *__cfqq = list_entry_cfqq(entry); 516 517 if (!__cfqq->service_last) 518 break; 519 if (time_before(__cfqq->service_last, cfqq->service_last)) 520 break; 521 } 522 523 list_add(&cfqq->cfq_list, entry); 524} 525 526/* 527 * add to busy list of queues for service, trying to be fair in ordering 528 * the pending list according to last request service 529 */ 530static inline void 531cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 532{ 533 BUG_ON(cfq_cfqq_on_rr(cfqq)); 534 cfq_mark_cfqq_on_rr(cfqq); 535 cfqd->busy_queues++; 536 537 cfq_resort_rr_list(cfqq, 0); 538} 539 540static inline void 541cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 542{ 543 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 544 cfq_clear_cfqq_on_rr(cfqq); 545 list_move(&cfqq->cfq_list, &cfqd->empty_list); 546 547 BUG_ON(!cfqd->busy_queues); 548 cfqd->busy_queues--; 549} 550 551/* 552 * rb tree support functions 553 */ 554static inline void cfq_del_crq_rb(struct cfq_rq *crq) 555{ 556 struct cfq_queue *cfqq = crq->cfq_queue; 557 struct cfq_data *cfqd = cfqq->cfqd; 558 const int sync = cfq_crq_is_sync(crq); 559 560 BUG_ON(!cfqq->queued[sync]); 561 cfqq->queued[sync]--; 562 563 cfq_update_next_crq(crq); 564 565 rb_erase(&crq->rb_node, &cfqq->sort_list); 566 RB_CLEAR_COLOR(&crq->rb_node); 567 568 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) 569 cfq_del_cfqq_rr(cfqd, cfqq); 570} 571 572static struct cfq_rq * 573__cfq_add_crq_rb(struct cfq_rq *crq) 574{ 575 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 576 struct rb_node *parent = NULL; 577 struct cfq_rq *__crq; 578 579 while (*p) { 580 parent = *p; 581 __crq = rb_entry_crq(parent); 582 583 if (crq->rb_key < __crq->rb_key) 584 p = &(*p)->rb_left; 585 else if (crq->rb_key > __crq->rb_key) 586 p = &(*p)->rb_right; 587 else 588 return __crq; 589 } 590 591 rb_link_node(&crq->rb_node, parent, p); 592 return NULL; 593} 594 595static void cfq_add_crq_rb(struct cfq_rq *crq) 596{ 597 struct cfq_queue *cfqq = crq->cfq_queue; 598 struct cfq_data *cfqd = cfqq->cfqd; 599 struct request *rq = crq->request; 600 struct cfq_rq *__alias; 601 602 crq->rb_key = rq_rb_key(rq); 603 cfqq->queued[cfq_crq_is_sync(crq)]++; 604 605 /* 606 * looks a little odd, but the first insert might return an alias. 607 * if that happens, put the alias on the dispatch list 608 */ 609 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 610 cfq_dispatch_insert(cfqd->queue, __alias); 611 612 rb_insert_color(&crq->rb_node, &cfqq->sort_list); 613 614 if (!cfq_cfqq_on_rr(cfqq)) 615 cfq_add_cfqq_rr(cfqd, cfqq); 616 617 /* 618 * check if this request is a better next-serve candidate 619 */ 620 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 621} 622 623static inline void 624cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 625{ 626 rb_erase(&crq->rb_node, &cfqq->sort_list); 627 cfqq->queued[cfq_crq_is_sync(crq)]--; 628 629 cfq_add_crq_rb(crq); 630} 631 632static struct request * 633cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 634{ 635 struct task_struct *tsk = current; 636 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); 637 struct cfq_queue *cfqq; 638 struct rb_node *n; 639 sector_t sector; 640 641 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 642 if (!cfqq) 643 goto out; 644 645 sector = bio->bi_sector + bio_sectors(bio); 646 n = cfqq->sort_list.rb_node; 647 while (n) { 648 struct cfq_rq *crq = rb_entry_crq(n); 649 650 if (sector < crq->rb_key) 651 n = n->rb_left; 652 else if (sector > crq->rb_key) 653 n = n->rb_right; 654 else 655 return crq->request; 656 } 657 658out: 659 return NULL; 660} 661 662static void cfq_activate_request(request_queue_t *q, struct request *rq) 663{ 664 struct cfq_data *cfqd = q->elevator->elevator_data; 665 666 cfqd->rq_in_driver++; 667} 668 669static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 670{ 671 struct cfq_data *cfqd = q->elevator->elevator_data; 672 673 WARN_ON(!cfqd->rq_in_driver); 674 cfqd->rq_in_driver--; 675} 676 677static void cfq_remove_request(struct request *rq) 678{ 679 struct cfq_rq *crq = RQ_DATA(rq); 680 681 list_del_init(&rq->queuelist); 682 cfq_del_crq_rb(crq); 683 cfq_del_crq_hash(crq); 684} 685 686static int 687cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) 688{ 689 struct cfq_data *cfqd = q->elevator->elevator_data; 690 struct request *__rq; 691 int ret; 692 693 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); 694 if (__rq && elv_rq_merge_ok(__rq, bio)) { 695 ret = ELEVATOR_BACK_MERGE; 696 goto out; 697 } 698 699 __rq = cfq_find_rq_fmerge(cfqd, bio); 700 if (__rq && elv_rq_merge_ok(__rq, bio)) { 701 ret = ELEVATOR_FRONT_MERGE; 702 goto out; 703 } 704 705 return ELEVATOR_NO_MERGE; 706out: 707 *req = __rq; 708 return ret; 709} 710 711static void cfq_merged_request(request_queue_t *q, struct request *req) 712{ 713 struct cfq_data *cfqd = q->elevator->elevator_data; 714 struct cfq_rq *crq = RQ_DATA(req); 715 716 cfq_del_crq_hash(crq); 717 cfq_add_crq_hash(cfqd, crq); 718 719 if (rq_rb_key(req) != crq->rb_key) { 720 struct cfq_queue *cfqq = crq->cfq_queue; 721 722 cfq_update_next_crq(crq); 723 cfq_reposition_crq_rb(cfqq, crq); 724 } 725} 726 727static void 728cfq_merged_requests(request_queue_t *q, struct request *rq, 729 struct request *next) 730{ 731 cfq_merged_request(q, rq); 732 733 /* 734 * reposition in fifo if next is older than rq 735 */ 736 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 737 time_before(next->start_time, rq->start_time)) 738 list_move(&rq->queuelist, &next->queuelist); 739 740 cfq_remove_request(next); 741} 742 743static inline void 744__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 745{ 746 if (cfqq) { 747 /* 748 * stop potential idle class queues waiting service 749 */ 750 del_timer(&cfqd->idle_class_timer); 751 752 cfqq->slice_start = jiffies; 753 cfqq->slice_end = 0; 754 cfqq->slice_left = 0; 755 cfq_clear_cfqq_must_alloc_slice(cfqq); 756 cfq_clear_cfqq_fifo_expire(cfqq); 757 } 758 759 cfqd->active_queue = cfqq; 760} 761 762/* 763 * current cfqq expired its slice (or was too idle), select new one 764 */ 765static void 766__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 767 int preempted) 768{ 769 unsigned long now = jiffies; 770 771 if (cfq_cfqq_wait_request(cfqq)) 772 del_timer(&cfqd->idle_slice_timer); 773 774 if (!preempted && !cfq_cfqq_dispatched(cfqq)) { 775 cfqq->service_last = now; 776 cfq_schedule_dispatch(cfqd); 777 } 778 779 cfq_clear_cfqq_must_dispatch(cfqq); 780 cfq_clear_cfqq_wait_request(cfqq); 781 782 /* 783 * store what was left of this slice, if the queue idled out 784 * or was preempted 785 */ 786 if (time_after(cfqq->slice_end, now)) 787 cfqq->slice_left = cfqq->slice_end - now; 788 else 789 cfqq->slice_left = 0; 790 791 if (cfq_cfqq_on_rr(cfqq)) 792 cfq_resort_rr_list(cfqq, preempted); 793 794 if (cfqq == cfqd->active_queue) 795 cfqd->active_queue = NULL; 796 797 if (cfqd->active_cic) { 798 put_io_context(cfqd->active_cic->ioc); 799 cfqd->active_cic = NULL; 800 } 801 802 cfqd->dispatch_slice = 0; 803} 804 805static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) 806{ 807 struct cfq_queue *cfqq = cfqd->active_queue; 808 809 if (cfqq) 810 __cfq_slice_expired(cfqd, cfqq, preempted); 811} 812 813/* 814 * 0 815 * 0,1 816 * 0,1,2 817 * 0,1,2,3 818 * 0,1,2,3,4 819 * 0,1,2,3,4,5 820 * 0,1,2,3,4,5,6 821 * 0,1,2,3,4,5,6,7 822 */ 823static int cfq_get_next_prio_level(struct cfq_data *cfqd) 824{ 825 int prio, wrap; 826 827 prio = -1; 828 wrap = 0; 829 do { 830 int p; 831 832 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { 833 if (!list_empty(&cfqd->rr_list[p])) { 834 prio = p; 835 break; 836 } 837 } 838 839 if (prio != -1) 840 break; 841 cfqd->cur_prio = 0; 842 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 843 cfqd->cur_end_prio = 0; 844 if (wrap) 845 break; 846 wrap = 1; 847 } 848 } while (1); 849 850 if (unlikely(prio == -1)) 851 return -1; 852 853 BUG_ON(prio >= CFQ_PRIO_LISTS); 854 855 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); 856 857 cfqd->cur_prio = prio + 1; 858 if (cfqd->cur_prio > cfqd->cur_end_prio) { 859 cfqd->cur_end_prio = cfqd->cur_prio; 860 cfqd->cur_prio = 0; 861 } 862 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { 863 cfqd->cur_prio = 0; 864 cfqd->cur_end_prio = 0; 865 } 866 867 return prio; 868} 869 870static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 871{ 872 struct cfq_queue *cfqq = NULL; 873 874 /* 875 * if current list is non-empty, grab first entry. if it is empty, 876 * get next prio level and grab first entry then if any are spliced 877 */ 878 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 879 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 880 881 /* 882 * if we have idle queues and no rt or be queues had pending 883 * requests, either allow immediate service if the grace period 884 * has passed or arm the idle grace timer 885 */ 886 if (!cfqq && !list_empty(&cfqd->idle_rr)) { 887 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 888 889 if (time_after_eq(jiffies, end)) 890 cfqq = list_entry_cfqq(cfqd->idle_rr.next); 891 else 892 mod_timer(&cfqd->idle_class_timer, end); 893 } 894 895 __cfq_set_active_queue(cfqd, cfqq); 896 return cfqq; 897} 898 899static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 900 901{ 902 struct cfq_io_context *cic; 903 unsigned long sl; 904 905 WARN_ON(!RB_EMPTY(&cfqq->sort_list)); 906 WARN_ON(cfqq != cfqd->active_queue); 907 908 /* 909 * idle is disabled, either manually or by past process history 910 */ 911 if (!cfqd->cfq_slice_idle) 912 return 0; 913 if (!cfq_cfqq_idle_window(cfqq)) 914 return 0; 915 /* 916 * task has exited, don't wait 917 */ 918 cic = cfqd->active_cic; 919 if (!cic || !cic->ioc->task) 920 return 0; 921 922 cfq_mark_cfqq_must_dispatch(cfqq); 923 cfq_mark_cfqq_wait_request(cfqq); 924 925 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 926 927 /* 928 * we don't want to idle for seeks, but we do want to allow 929 * fair distribution of slice time for a process doing back-to-back 930 * seeks. so allow a little bit of time for him to submit a new rq 931 */ 932 if (sample_valid(cic->seek_samples) && cic->seek_mean > 131072) 933 sl = 2; 934 935 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 936 return 1; 937} 938 939static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 940{ 941 struct cfq_data *cfqd = q->elevator->elevator_data; 942 struct cfq_queue *cfqq = crq->cfq_queue; 943 944 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 945 cfq_remove_request(crq->request); 946 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 947 elv_dispatch_sort(q, crq->request); 948} 949 950/* 951 * return expired entry, or NULL to just start from scratch in rbtree 952 */ 953static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 954{ 955 struct cfq_data *cfqd = cfqq->cfqd; 956 struct request *rq; 957 struct cfq_rq *crq; 958 959 if (cfq_cfqq_fifo_expire(cfqq)) 960 return NULL; 961 962 if (!list_empty(&cfqq->fifo)) { 963 int fifo = cfq_cfqq_class_sync(cfqq); 964 965 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 966 rq = crq->request; 967 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 968 cfq_mark_cfqq_fifo_expire(cfqq); 969 return crq; 970 } 971 } 972 973 return NULL; 974} 975 976/* 977 * Scale schedule slice based on io priority. Use the sync time slice only 978 * if a queue is marked sync and has sync io queued. A sync queue with async 979 * io only, should not get full sync slice length. 980 */ 981static inline int 982cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 983{ 984 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; 985 986 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 987 988 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); 989} 990 991static inline void 992cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 993{ 994 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 995} 996 997static inline int 998cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 999{ 1000 const int base_rq = cfqd->cfq_slice_async_rq; 1001 1002 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 1003 1004 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 1005} 1006 1007/* 1008 * get next queue for service 1009 */ 1010static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1011{ 1012 unsigned long now = jiffies; 1013 struct cfq_queue *cfqq; 1014 1015 cfqq = cfqd->active_queue; 1016 if (!cfqq) 1017 goto new_queue; 1018 1019 /* 1020 * slice has expired 1021 */ 1022 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) 1023 goto expire; 1024 1025 /* 1026 * if queue has requests, dispatch one. if not, check if 1027 * enough slice is left to wait for one 1028 */ 1029 if (!RB_EMPTY(&cfqq->sort_list)) 1030 goto keep_queue; 1031 else if (cfq_cfqq_class_sync(cfqq) && 1032 time_before(now, cfqq->slice_end)) { 1033 if (cfq_arm_slice_timer(cfqd, cfqq)) 1034 return NULL; 1035 } 1036 1037expire: 1038 cfq_slice_expired(cfqd, 0); 1039new_queue: 1040 cfqq = cfq_set_active_queue(cfqd); 1041keep_queue: 1042 return cfqq; 1043} 1044 1045static int 1046__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1047 int max_dispatch) 1048{ 1049 int dispatched = 0; 1050 1051 BUG_ON(RB_EMPTY(&cfqq->sort_list)); 1052 1053 do { 1054 struct cfq_rq *crq; 1055 1056 /* 1057 * follow expired path, else get first next available 1058 */ 1059 if ((crq = cfq_check_fifo(cfqq)) == NULL) 1060 crq = cfqq->next_crq; 1061 1062 /* 1063 * finally, insert request into driver dispatch list 1064 */ 1065 cfq_dispatch_insert(cfqd->queue, crq); 1066 1067 cfqd->dispatch_slice++; 1068 dispatched++; 1069 1070 if (!cfqd->active_cic) { 1071 atomic_inc(&crq->io_context->ioc->refcount); 1072 cfqd->active_cic = crq->io_context; 1073 } 1074 1075 if (RB_EMPTY(&cfqq->sort_list)) 1076 break; 1077 1078 } while (dispatched < max_dispatch); 1079 1080 /* 1081 * if slice end isn't set yet, set it. if at least one request was 1082 * sync, use the sync time slice value 1083 */ 1084 if (!cfqq->slice_end) 1085 cfq_set_prio_slice(cfqd, cfqq); 1086 1087 /* 1088 * expire an async queue immediately if it has used up its slice. idle 1089 * queue always expire after 1 dispatch round. 1090 */ 1091 if ((!cfq_cfqq_sync(cfqq) && 1092 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1093 cfq_class_idle(cfqq)) 1094 cfq_slice_expired(cfqd, 0); 1095 1096 return dispatched; 1097} 1098 1099static int 1100cfq_forced_dispatch_cfqqs(struct list_head *list) 1101{ 1102 int dispatched = 0; 1103 struct cfq_queue *cfqq, *next; 1104 struct cfq_rq *crq; 1105 1106 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 1107 while ((crq = cfqq->next_crq)) { 1108 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 1109 dispatched++; 1110 } 1111 BUG_ON(!list_empty(&cfqq->fifo)); 1112 } 1113 return dispatched; 1114} 1115 1116static int 1117cfq_forced_dispatch(struct cfq_data *cfqd) 1118{ 1119 int i, dispatched = 0; 1120 1121 for (i = 0; i < CFQ_PRIO_LISTS; i++) 1122 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); 1123 1124 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); 1125 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); 1126 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); 1127 1128 cfq_slice_expired(cfqd, 0); 1129 1130 BUG_ON(cfqd->busy_queues); 1131 1132 return dispatched; 1133} 1134 1135static int 1136cfq_dispatch_requests(request_queue_t *q, int force) 1137{ 1138 struct cfq_data *cfqd = q->elevator->elevator_data; 1139 struct cfq_queue *cfqq; 1140 1141 if (!cfqd->busy_queues) 1142 return 0; 1143 1144 if (unlikely(force)) 1145 return cfq_forced_dispatch(cfqd); 1146 1147 cfqq = cfq_select_queue(cfqd); 1148 if (cfqq) { 1149 int max_dispatch; 1150 1151 cfq_clear_cfqq_must_dispatch(cfqq); 1152 cfq_clear_cfqq_wait_request(cfqq); 1153 del_timer(&cfqd->idle_slice_timer); 1154 1155 max_dispatch = cfqd->cfq_quantum; 1156 if (cfq_class_idle(cfqq)) 1157 max_dispatch = 1; 1158 1159 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1160 } 1161 1162 return 0; 1163} 1164 1165/* 1166 * task holds one reference to the queue, dropped when task exits. each crq 1167 * in-flight on this queue also holds a reference, dropped when crq is freed. 1168 * 1169 * queue lock must be held here. 1170 */ 1171static void cfq_put_queue(struct cfq_queue *cfqq) 1172{ 1173 struct cfq_data *cfqd = cfqq->cfqd; 1174 1175 BUG_ON(atomic_read(&cfqq->ref) <= 0); 1176 1177 if (!atomic_dec_and_test(&cfqq->ref)) 1178 return; 1179 1180 BUG_ON(rb_first(&cfqq->sort_list)); 1181 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1182 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1183 1184 if (unlikely(cfqd->active_queue == cfqq)) 1185 __cfq_slice_expired(cfqd, cfqq, 0); 1186 1187 /* 1188 * it's on the empty list and still hashed 1189 */ 1190 list_del(&cfqq->cfq_list); 1191 hlist_del(&cfqq->cfq_hash); 1192 kmem_cache_free(cfq_pool, cfqq); 1193} 1194 1195static inline struct cfq_queue * 1196__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1197 const int hashval) 1198{ 1199 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1200 struct hlist_node *entry; 1201 struct cfq_queue *__cfqq; 1202 1203 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) { 1204 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1205 1206 if (__cfqq->key == key && (__p == prio || !prio)) 1207 return __cfqq; 1208 } 1209 1210 return NULL; 1211} 1212 1213static struct cfq_queue * 1214cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) 1215{ 1216 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); 1217} 1218 1219static void cfq_free_io_context(struct io_context *ioc) 1220{ 1221 struct cfq_io_context *__cic; 1222 struct rb_node *n; 1223 int freed = 0; 1224 1225 while ((n = rb_first(&ioc->cic_root)) != NULL) { 1226 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1227 rb_erase(&__cic->rb_node, &ioc->cic_root); 1228 kmem_cache_free(cfq_ioc_pool, __cic); 1229 freed++; 1230 } 1231 1232 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1233 complete(ioc_gone); 1234} 1235 1236static void cfq_trim(struct io_context *ioc) 1237{ 1238 ioc->set_ioprio = NULL; 1239 cfq_free_io_context(ioc); 1240} 1241 1242/* 1243 * Called with interrupts disabled 1244 */ 1245static void cfq_exit_single_io_context(struct cfq_io_context *cic) 1246{ 1247 struct cfq_data *cfqd = cic->key; 1248 request_queue_t *q; 1249 1250 if (!cfqd) 1251 return; 1252 1253 q = cfqd->queue; 1254 1255 WARN_ON(!irqs_disabled()); 1256 1257 spin_lock(q->queue_lock); 1258 1259 if (cic->cfqq[ASYNC]) { 1260 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) 1261 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); 1262 cfq_put_queue(cic->cfqq[ASYNC]); 1263 cic->cfqq[ASYNC] = NULL; 1264 } 1265 1266 if (cic->cfqq[SYNC]) { 1267 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) 1268 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); 1269 cfq_put_queue(cic->cfqq[SYNC]); 1270 cic->cfqq[SYNC] = NULL; 1271 } 1272 1273 cic->key = NULL; 1274 list_del_init(&cic->queue_list); 1275 spin_unlock(q->queue_lock); 1276} 1277 1278static void cfq_exit_io_context(struct io_context *ioc) 1279{ 1280 struct cfq_io_context *__cic; 1281 unsigned long flags; 1282 struct rb_node *n; 1283 1284 /* 1285 * put the reference this task is holding to the various queues 1286 */ 1287 read_lock_irqsave(&cfq_exit_lock, flags); 1288 1289 n = rb_first(&ioc->cic_root); 1290 while (n != NULL) { 1291 __cic = rb_entry(n, struct cfq_io_context, rb_node); 1292 1293 cfq_exit_single_io_context(__cic); 1294 n = rb_next(n); 1295 } 1296 1297 read_unlock_irqrestore(&cfq_exit_lock, flags); 1298} 1299 1300static struct cfq_io_context * 1301cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1302{ 1303 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1304 1305 if (cic) { 1306 RB_CLEAR(&cic->rb_node); 1307 cic->key = NULL; 1308 cic->cfqq[ASYNC] = NULL; 1309 cic->cfqq[SYNC] = NULL; 1310 cic->last_end_request = jiffies; 1311 cic->ttime_total = 0; 1312 cic->ttime_samples = 0; 1313 cic->ttime_mean = 0; 1314 cic->dtor = cfq_free_io_context; 1315 cic->exit = cfq_exit_io_context; 1316 INIT_LIST_HEAD(&cic->queue_list); 1317 atomic_inc(&ioc_count); 1318 } 1319 1320 return cic; 1321} 1322 1323static void cfq_init_prio_data(struct cfq_queue *cfqq) 1324{ 1325 struct task_struct *tsk = current; 1326 int ioprio_class; 1327 1328 if (!cfq_cfqq_prio_changed(cfqq)) 1329 return; 1330 1331 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); 1332 switch (ioprio_class) { 1333 default: 1334 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); 1335 case IOPRIO_CLASS_NONE: 1336 /* 1337 * no prio set, place us in the middle of the BE classes 1338 */ 1339 cfqq->ioprio = task_nice_ioprio(tsk); 1340 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1341 break; 1342 case IOPRIO_CLASS_RT: 1343 cfqq->ioprio = task_ioprio(tsk); 1344 cfqq->ioprio_class = IOPRIO_CLASS_RT; 1345 break; 1346 case IOPRIO_CLASS_BE: 1347 cfqq->ioprio = task_ioprio(tsk); 1348 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1349 break; 1350 case IOPRIO_CLASS_IDLE: 1351 cfqq->ioprio_class = IOPRIO_CLASS_IDLE; 1352 cfqq->ioprio = 7; 1353 cfq_clear_cfqq_idle_window(cfqq); 1354 break; 1355 } 1356 1357 /* 1358 * keep track of original prio settings in case we have to temporarily 1359 * elevate the priority of this queue 1360 */ 1361 cfqq->org_ioprio = cfqq->ioprio; 1362 cfqq->org_ioprio_class = cfqq->ioprio_class; 1363 1364 if (cfq_cfqq_on_rr(cfqq)) 1365 cfq_resort_rr_list(cfqq, 0); 1366 1367 cfq_clear_cfqq_prio_changed(cfqq); 1368} 1369 1370static inline void changed_ioprio(struct cfq_io_context *cic) 1371{ 1372 struct cfq_data *cfqd = cic->key; 1373 struct cfq_queue *cfqq; 1374 if (cfqd) { 1375 spin_lock(cfqd->queue->queue_lock); 1376 cfqq = cic->cfqq[ASYNC]; 1377 if (cfqq) { 1378 struct cfq_queue *new_cfqq; 1379 new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, 1380 cic->ioc->task, GFP_ATOMIC); 1381 if (new_cfqq) { 1382 cic->cfqq[ASYNC] = new_cfqq; 1383 cfq_put_queue(cfqq); 1384 } 1385 } 1386 cfqq = cic->cfqq[SYNC]; 1387 if (cfqq) { 1388 cfq_mark_cfqq_prio_changed(cfqq); 1389 cfq_init_prio_data(cfqq); 1390 } 1391 spin_unlock(cfqd->queue->queue_lock); 1392 } 1393} 1394 1395/* 1396 * callback from sys_ioprio_set, irqs are disabled 1397 */ 1398static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) 1399{ 1400 struct cfq_io_context *cic; 1401 struct rb_node *n; 1402 1403 write_lock(&cfq_exit_lock); 1404 1405 n = rb_first(&ioc->cic_root); 1406 while (n != NULL) { 1407 cic = rb_entry(n, struct cfq_io_context, rb_node); 1408 1409 changed_ioprio(cic); 1410 n = rb_next(n); 1411 } 1412 1413 write_unlock(&cfq_exit_lock); 1414 1415 return 0; 1416} 1417 1418static struct cfq_queue * 1419cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, 1420 gfp_t gfp_mask) 1421{ 1422 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1423 struct cfq_queue *cfqq, *new_cfqq = NULL; 1424 unsigned short ioprio; 1425 1426retry: 1427 ioprio = tsk->ioprio; 1428 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); 1429 1430 if (!cfqq) { 1431 if (new_cfqq) { 1432 cfqq = new_cfqq; 1433 new_cfqq = NULL; 1434 } else if (gfp_mask & __GFP_WAIT) { 1435 spin_unlock_irq(cfqd->queue->queue_lock); 1436 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1437 spin_lock_irq(cfqd->queue->queue_lock); 1438 goto retry; 1439 } else { 1440 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1441 if (!cfqq) 1442 goto out; 1443 } 1444 1445 memset(cfqq, 0, sizeof(*cfqq)); 1446 1447 INIT_HLIST_NODE(&cfqq->cfq_hash); 1448 INIT_LIST_HEAD(&cfqq->cfq_list); 1449 RB_CLEAR_ROOT(&cfqq->sort_list); 1450 INIT_LIST_HEAD(&cfqq->fifo); 1451 1452 cfqq->key = key; 1453 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1454 atomic_set(&cfqq->ref, 0); 1455 cfqq->cfqd = cfqd; 1456 cfqq->service_last = 0; 1457 /* 1458 * set ->slice_left to allow preemption for a new process 1459 */ 1460 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1461 cfq_mark_cfqq_idle_window(cfqq); 1462 cfq_mark_cfqq_prio_changed(cfqq); 1463 cfq_init_prio_data(cfqq); 1464 } 1465 1466 if (new_cfqq) 1467 kmem_cache_free(cfq_pool, new_cfqq); 1468 1469 atomic_inc(&cfqq->ref); 1470out: 1471 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); 1472 return cfqq; 1473} 1474 1475static void 1476cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1477{ 1478 read_lock(&cfq_exit_lock); 1479 rb_erase(&cic->rb_node, &ioc->cic_root); 1480 read_unlock(&cfq_exit_lock); 1481 kmem_cache_free(cfq_ioc_pool, cic); 1482 atomic_dec(&ioc_count); 1483} 1484 1485static struct cfq_io_context * 1486cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1487{ 1488 struct rb_node *n; 1489 struct cfq_io_context *cic; 1490 void *k, *key = cfqd; 1491 1492restart: 1493 n = ioc->cic_root.rb_node; 1494 while (n) { 1495 cic = rb_entry(n, struct cfq_io_context, rb_node); 1496 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1497 k = cic->key; 1498 if (unlikely(!k)) { 1499 cfq_drop_dead_cic(ioc, cic); 1500 goto restart; 1501 } 1502 1503 if (key < k) 1504 n = n->rb_left; 1505 else if (key > k) 1506 n = n->rb_right; 1507 else 1508 return cic; 1509 } 1510 1511 return NULL; 1512} 1513 1514static inline void 1515cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, 1516 struct cfq_io_context *cic) 1517{ 1518 struct rb_node **p; 1519 struct rb_node *parent; 1520 struct cfq_io_context *__cic; 1521 void *k; 1522 1523 cic->ioc = ioc; 1524 cic->key = cfqd; 1525 1526 ioc->set_ioprio = cfq_ioc_set_ioprio; 1527restart: 1528 parent = NULL; 1529 p = &ioc->cic_root.rb_node; 1530 while (*p) { 1531 parent = *p; 1532 __cic = rb_entry(parent, struct cfq_io_context, rb_node); 1533 /* ->key must be copied to avoid race with cfq_exit_queue() */ 1534 k = __cic->key; 1535 if (unlikely(!k)) { 1536 cfq_drop_dead_cic(ioc, cic); 1537 goto restart; 1538 } 1539 1540 if (cic->key < k) 1541 p = &(*p)->rb_left; 1542 else if (cic->key > k) 1543 p = &(*p)->rb_right; 1544 else 1545 BUG(); 1546 } 1547 1548 read_lock(&cfq_exit_lock); 1549 rb_link_node(&cic->rb_node, parent, p); 1550 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1551 list_add(&cic->queue_list, &cfqd->cic_list); 1552 read_unlock(&cfq_exit_lock); 1553} 1554 1555/* 1556 * Setup general io context and cfq io context. There can be several cfq 1557 * io contexts per general io context, if this process is doing io to more 1558 * than one device managed by cfq. 1559 */ 1560static struct cfq_io_context * 1561cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1562{ 1563 struct io_context *ioc = NULL; 1564 struct cfq_io_context *cic; 1565 1566 might_sleep_if(gfp_mask & __GFP_WAIT); 1567 1568 ioc = get_io_context(gfp_mask); 1569 if (!ioc) 1570 return NULL; 1571 1572 cic = cfq_cic_rb_lookup(cfqd, ioc); 1573 if (cic) 1574 goto out; 1575 1576 cic = cfq_alloc_io_context(cfqd, gfp_mask); 1577 if (cic == NULL) 1578 goto err; 1579 1580 cfq_cic_link(cfqd, ioc, cic); 1581out: 1582 return cic; 1583err: 1584 put_io_context(ioc); 1585 return NULL; 1586} 1587 1588static void 1589cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 1590{ 1591 unsigned long elapsed, ttime; 1592 1593 /* 1594 * if this context already has stuff queued, thinktime is from 1595 * last queue not last end 1596 */ 1597#if 0 1598 if (time_after(cic->last_end_request, cic->last_queue)) 1599 elapsed = jiffies - cic->last_end_request; 1600 else 1601 elapsed = jiffies - cic->last_queue; 1602#else 1603 elapsed = jiffies - cic->last_end_request; 1604#endif 1605 1606 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 1607 1608 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 1609 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 1610 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1611} 1612 1613static void 1614cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1615 struct cfq_rq *crq) 1616{ 1617 sector_t sdist; 1618 u64 total; 1619 1620 if (cic->last_request_pos < crq->request->sector) 1621 sdist = crq->request->sector - cic->last_request_pos; 1622 else 1623 sdist = cic->last_request_pos - crq->request->sector; 1624 1625 /* 1626 * Don't allow the seek distance to get too large from the 1627 * odd fragment, pagein, etc 1628 */ 1629 if (cic->seek_samples <= 60) /* second&third seek */ 1630 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); 1631 else 1632 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); 1633 1634 cic->seek_samples = (7*cic->seek_samples + 256) / 8; 1635 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; 1636 total = cic->seek_total + (cic->seek_samples/2); 1637 do_div(total, cic->seek_samples); 1638 cic->seek_mean = (sector_t)total; 1639} 1640 1641/* 1642 * Disable idle window if the process thinks too long or seeks so much that 1643 * it doesn't matter 1644 */ 1645static void 1646cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1647 struct cfq_io_context *cic) 1648{ 1649 int enable_idle = cfq_cfqq_idle_window(cfqq); 1650 1651 if (!cic->ioc->task || !cfqd->cfq_slice_idle) 1652 enable_idle = 0; 1653 else if (sample_valid(cic->ttime_samples)) { 1654 if (cic->ttime_mean > cfqd->cfq_slice_idle) 1655 enable_idle = 0; 1656 else 1657 enable_idle = 1; 1658 } 1659 1660 if (enable_idle) 1661 cfq_mark_cfqq_idle_window(cfqq); 1662 else 1663 cfq_clear_cfqq_idle_window(cfqq); 1664} 1665 1666 1667/* 1668 * Check if new_cfqq should preempt the currently active queue. Return 0 for 1669 * no or if we aren't sure, a 1 will cause a preempt. 1670 */ 1671static int 1672cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1673 struct cfq_rq *crq) 1674{ 1675 struct cfq_queue *cfqq = cfqd->active_queue; 1676 1677 if (cfq_class_idle(new_cfqq)) 1678 return 0; 1679 1680 if (!cfqq) 1681 return 1; 1682 1683 if (cfq_class_idle(cfqq)) 1684 return 1; 1685 if (!cfq_cfqq_wait_request(new_cfqq)) 1686 return 0; 1687 /* 1688 * if it doesn't have slice left, forget it 1689 */ 1690 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1691 return 0; 1692 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1693 return 1; 1694 1695 return 0; 1696} 1697 1698/* 1699 * cfqq preempts the active queue. if we allowed preempt with no slice left, 1700 * let it have half of its nominal slice. 1701 */ 1702static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1703{ 1704 struct cfq_queue *__cfqq, *next; 1705 1706 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) 1707 cfq_resort_rr_list(__cfqq, 1); 1708 1709 if (!cfqq->slice_left) 1710 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1711 1712 cfqq->slice_end = cfqq->slice_left + jiffies; 1713 __cfq_slice_expired(cfqd, cfqq, 1); 1714 __cfq_set_active_queue(cfqd, cfqq); 1715} 1716 1717/* 1718 * should really be a ll_rw_blk.c helper 1719 */ 1720static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1721{ 1722 request_queue_t *q = cfqd->queue; 1723 1724 if (!blk_queue_plugged(q)) 1725 q->request_fn(q); 1726 else 1727 __generic_unplug_device(q); 1728} 1729 1730/* 1731 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1732 * something we should do about it 1733 */ 1734static void 1735cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1736 struct cfq_rq *crq) 1737{ 1738 struct cfq_io_context *cic; 1739 1740 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); 1741 1742 /* 1743 * we never wait for an async request and we don't allow preemption 1744 * of an async request. so just return early 1745 */ 1746 if (!cfq_crq_is_sync(crq)) 1747 return; 1748 1749 cic = crq->io_context; 1750 1751 cfq_update_io_thinktime(cfqd, cic); 1752 cfq_update_io_seektime(cfqd, cic, crq); 1753 cfq_update_idle_window(cfqd, cfqq, cic); 1754 1755 cic->last_queue = jiffies; 1756 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; 1757 1758 if (cfqq == cfqd->active_queue) { 1759 /* 1760 * if we are waiting for a request for this queue, let it rip 1761 * immediately and flag that we must not expire this queue 1762 * just now 1763 */ 1764 if (cfq_cfqq_wait_request(cfqq)) { 1765 cfq_mark_cfqq_must_dispatch(cfqq); 1766 del_timer(&cfqd->idle_slice_timer); 1767 cfq_start_queueing(cfqd, cfqq); 1768 } 1769 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1770 /* 1771 * not the active queue - expire current slice if it is 1772 * idle and has expired it's mean thinktime or this new queue 1773 * has some old slice time left and is of higher priority 1774 */ 1775 cfq_preempt_queue(cfqd, cfqq); 1776 cfq_mark_cfqq_must_dispatch(cfqq); 1777 cfq_start_queueing(cfqd, cfqq); 1778 } 1779} 1780 1781static void cfq_insert_request(request_queue_t *q, struct request *rq) 1782{ 1783 struct cfq_data *cfqd = q->elevator->elevator_data; 1784 struct cfq_rq *crq = RQ_DATA(rq); 1785 struct cfq_queue *cfqq = crq->cfq_queue; 1786 1787 cfq_init_prio_data(cfqq); 1788 1789 cfq_add_crq_rb(crq); 1790 1791 list_add_tail(&rq->queuelist, &cfqq->fifo); 1792 1793 if (rq_mergeable(rq)) 1794 cfq_add_crq_hash(cfqd, crq); 1795 1796 cfq_crq_enqueued(cfqd, cfqq, crq); 1797} 1798 1799static void cfq_completed_request(request_queue_t *q, struct request *rq) 1800{ 1801 struct cfq_rq *crq = RQ_DATA(rq); 1802 struct cfq_queue *cfqq = crq->cfq_queue; 1803 struct cfq_data *cfqd = cfqq->cfqd; 1804 const int sync = cfq_crq_is_sync(crq); 1805 unsigned long now; 1806 1807 now = jiffies; 1808 1809 WARN_ON(!cfqd->rq_in_driver); 1810 WARN_ON(!cfqq->on_dispatch[sync]); 1811 cfqd->rq_in_driver--; 1812 cfqq->on_dispatch[sync]--; 1813 1814 if (!cfq_class_idle(cfqq)) 1815 cfqd->last_end_request = now; 1816 1817 if (!cfq_cfqq_dispatched(cfqq)) { 1818 if (cfq_cfqq_on_rr(cfqq)) { 1819 cfqq->service_last = now; 1820 cfq_resort_rr_list(cfqq, 0); 1821 } 1822 cfq_schedule_dispatch(cfqd); 1823 } 1824 1825 if (cfq_crq_is_sync(crq)) 1826 crq->io_context->last_end_request = now; 1827} 1828 1829static struct request * 1830cfq_former_request(request_queue_t *q, struct request *rq) 1831{ 1832 struct cfq_rq *crq = RQ_DATA(rq); 1833 struct rb_node *rbprev = rb_prev(&crq->rb_node); 1834 1835 if (rbprev) 1836 return rb_entry_crq(rbprev)->request; 1837 1838 return NULL; 1839} 1840 1841static struct request * 1842cfq_latter_request(request_queue_t *q, struct request *rq) 1843{ 1844 struct cfq_rq *crq = RQ_DATA(rq); 1845 struct rb_node *rbnext = rb_next(&crq->rb_node); 1846 1847 if (rbnext) 1848 return rb_entry_crq(rbnext)->request; 1849 1850 return NULL; 1851} 1852 1853/* 1854 * we temporarily boost lower priority queues if they are holding fs exclusive 1855 * resources. they are boosted to normal prio (CLASS_BE/4) 1856 */ 1857static void cfq_prio_boost(struct cfq_queue *cfqq) 1858{ 1859 const int ioprio_class = cfqq->ioprio_class; 1860 const int ioprio = cfqq->ioprio; 1861 1862 if (has_fs_excl()) { 1863 /* 1864 * boost idle prio on transactions that would lock out other 1865 * users of the filesystem 1866 */ 1867 if (cfq_class_idle(cfqq)) 1868 cfqq->ioprio_class = IOPRIO_CLASS_BE; 1869 if (cfqq->ioprio > IOPRIO_NORM) 1870 cfqq->ioprio = IOPRIO_NORM; 1871 } else { 1872 /* 1873 * check if we need to unboost the queue 1874 */ 1875 if (cfqq->ioprio_class != cfqq->org_ioprio_class) 1876 cfqq->ioprio_class = cfqq->org_ioprio_class; 1877 if (cfqq->ioprio != cfqq->org_ioprio) 1878 cfqq->ioprio = cfqq->org_ioprio; 1879 } 1880 1881 /* 1882 * refile between round-robin lists if we moved the priority class 1883 */ 1884 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && 1885 cfq_cfqq_on_rr(cfqq)) 1886 cfq_resort_rr_list(cfqq, 0); 1887} 1888 1889static inline int 1890__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1891 struct task_struct *task, int rw) 1892{ 1893#if 1 1894 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1895 !cfq_cfqq_must_alloc_slice(cfqq)) { 1896 cfq_mark_cfqq_must_alloc_slice(cfqq); 1897 return ELV_MQUEUE_MUST; 1898 } 1899 1900 return ELV_MQUEUE_MAY; 1901#else 1902 if (!cfqq || task->flags & PF_MEMALLOC) 1903 return ELV_MQUEUE_MAY; 1904 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) { 1905 if (cfq_cfqq_wait_request(cfqq)) 1906 return ELV_MQUEUE_MUST; 1907 1908 /* 1909 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we 1910 * can quickly flood the queue with writes from a single task 1911 */ 1912 if (rw == READ || !cfq_cfqq_must_alloc_slice(cfqq)) { 1913 cfq_mark_cfqq_must_alloc_slice(cfqq); 1914 return ELV_MQUEUE_MUST; 1915 } 1916 1917 return ELV_MQUEUE_MAY; 1918 } 1919 if (cfq_class_idle(cfqq)) 1920 return ELV_MQUEUE_NO; 1921 if (cfqq->allocated[rw] >= cfqd->max_queued) { 1922 struct io_context *ioc = get_io_context(GFP_ATOMIC); 1923 int ret = ELV_MQUEUE_NO; 1924 1925 if (ioc && ioc->nr_batch_requests) 1926 ret = ELV_MQUEUE_MAY; 1927 1928 put_io_context(ioc); 1929 return ret; 1930 } 1931 1932 return ELV_MQUEUE_MAY; 1933#endif 1934} 1935 1936static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1937{ 1938 struct cfq_data *cfqd = q->elevator->elevator_data; 1939 struct task_struct *tsk = current; 1940 struct cfq_queue *cfqq; 1941 1942 /* 1943 * don't force setup of a queue from here, as a call to may_queue 1944 * does not necessarily imply that a request actually will be queued. 1945 * so just lookup a possibly existing queue, or return 'may queue' 1946 * if that fails 1947 */ 1948 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); 1949 if (cfqq) { 1950 cfq_init_prio_data(cfqq); 1951 cfq_prio_boost(cfqq); 1952 1953 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1954 } 1955 1956 return ELV_MQUEUE_MAY; 1957} 1958 1959static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) 1960{ 1961 struct cfq_data *cfqd = q->elevator->elevator_data; 1962 struct request_list *rl = &q->rq; 1963 1964 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) { 1965 smp_mb(); 1966 if (waitqueue_active(&rl->wait[READ])) 1967 wake_up(&rl->wait[READ]); 1968 } 1969 1970 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) { 1971 smp_mb(); 1972 if (waitqueue_active(&rl->wait[WRITE])) 1973 wake_up(&rl->wait[WRITE]); 1974 } 1975} 1976 1977/* 1978 * queue lock held here 1979 */ 1980static void cfq_put_request(request_queue_t *q, struct request *rq) 1981{ 1982 struct cfq_data *cfqd = q->elevator->elevator_data; 1983 struct cfq_rq *crq = RQ_DATA(rq); 1984 1985 if (crq) { 1986 struct cfq_queue *cfqq = crq->cfq_queue; 1987 const int rw = rq_data_dir(rq); 1988 1989 BUG_ON(!cfqq->allocated[rw]); 1990 cfqq->allocated[rw]--; 1991 1992 put_io_context(crq->io_context->ioc); 1993 1994 mempool_free(crq, cfqd->crq_pool); 1995 rq->elevator_private = NULL; 1996 1997 cfq_check_waiters(q, cfqq); 1998 cfq_put_queue(cfqq); 1999 } 2000} 2001 2002/* 2003 * Allocate cfq data structures associated with this request. 2004 */ 2005static int 2006cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 2007 gfp_t gfp_mask) 2008{ 2009 struct cfq_data *cfqd = q->elevator->elevator_data; 2010 struct task_struct *tsk = current; 2011 struct cfq_io_context *cic; 2012 const int rw = rq_data_dir(rq); 2013 pid_t key = cfq_queue_pid(tsk, rw); 2014 struct cfq_queue *cfqq; 2015 struct cfq_rq *crq; 2016 unsigned long flags; 2017 int is_sync = key != CFQ_KEY_ASYNC; 2018 2019 might_sleep_if(gfp_mask & __GFP_WAIT); 2020 2021 cic = cfq_get_io_context(cfqd, gfp_mask); 2022 2023 spin_lock_irqsave(q->queue_lock, flags); 2024 2025 if (!cic) 2026 goto queue_fail; 2027 2028 if (!cic->cfqq[is_sync]) { 2029 cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); 2030 if (!cfqq) 2031 goto queue_fail; 2032 2033 cic->cfqq[is_sync] = cfqq; 2034 } else 2035 cfqq = cic->cfqq[is_sync]; 2036 2037 cfqq->allocated[rw]++; 2038 cfq_clear_cfqq_must_alloc(cfqq); 2039 cfqd->rq_starved = 0; 2040 atomic_inc(&cfqq->ref); 2041 spin_unlock_irqrestore(q->queue_lock, flags); 2042 2043 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 2044 if (crq) { 2045 RB_CLEAR(&crq->rb_node); 2046 crq->rb_key = 0; 2047 crq->request = rq; 2048 INIT_HLIST_NODE(&crq->hash); 2049 crq->cfq_queue = cfqq; 2050 crq->io_context = cic; 2051 2052 if (is_sync) 2053 cfq_mark_crq_is_sync(crq); 2054 else 2055 cfq_clear_crq_is_sync(crq); 2056 2057 rq->elevator_private = crq; 2058 return 0; 2059 } 2060 2061 spin_lock_irqsave(q->queue_lock, flags); 2062 cfqq->allocated[rw]--; 2063 if (!(cfqq->allocated[0] + cfqq->allocated[1])) 2064 cfq_mark_cfqq_must_alloc(cfqq); 2065 cfq_put_queue(cfqq); 2066queue_fail: 2067 if (cic) 2068 put_io_context(cic->ioc); 2069 /* 2070 * mark us rq allocation starved. we need to kickstart the process 2071 * ourselves if there are no pending requests that can do it for us. 2072 * that would be an extremely rare OOM situation 2073 */ 2074 cfqd->rq_starved = 1; 2075 cfq_schedule_dispatch(cfqd); 2076 spin_unlock_irqrestore(q->queue_lock, flags); 2077 return 1; 2078} 2079 2080static void cfq_kick_queue(void *data) 2081{ 2082 request_queue_t *q = data; 2083 struct cfq_data *cfqd = q->elevator->elevator_data; 2084 unsigned long flags; 2085 2086 spin_lock_irqsave(q->queue_lock, flags); 2087 2088 if (cfqd->rq_starved) { 2089 struct request_list *rl = &q->rq; 2090 2091 /* 2092 * we aren't guaranteed to get a request after this, but we 2093 * have to be opportunistic 2094 */ 2095 smp_mb(); 2096 if (waitqueue_active(&rl->wait[READ])) 2097 wake_up(&rl->wait[READ]); 2098 if (waitqueue_active(&rl->wait[WRITE])) 2099 wake_up(&rl->wait[WRITE]); 2100 } 2101 2102 blk_remove_plug(q); 2103 q->request_fn(q); 2104 spin_unlock_irqrestore(q->queue_lock, flags); 2105} 2106 2107/* 2108 * Timer running if the active_queue is currently idling inside its time slice 2109 */ 2110static void cfq_idle_slice_timer(unsigned long data) 2111{ 2112 struct cfq_data *cfqd = (struct cfq_data *) data; 2113 struct cfq_queue *cfqq; 2114 unsigned long flags; 2115 2116 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2117 2118 if ((cfqq = cfqd->active_queue) != NULL) { 2119 unsigned long now = jiffies; 2120 2121 /* 2122 * expired 2123 */ 2124 if (time_after(now, cfqq->slice_end)) 2125 goto expire; 2126 2127 /* 2128 * only expire and reinvoke request handler, if there are 2129 * other queues with pending requests 2130 */ 2131 if (!cfqd->busy_queues) { 2132 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); 2133 add_timer(&cfqd->idle_slice_timer); 2134 goto out_cont; 2135 } 2136 2137 /* 2138 * not expired and it has a request pending, let it dispatch 2139 */ 2140 if (!RB_EMPTY(&cfqq->sort_list)) { 2141 cfq_mark_cfqq_must_dispatch(cfqq); 2142 goto out_kick; 2143 } 2144 } 2145expire: 2146 cfq_slice_expired(cfqd, 0); 2147out_kick: 2148 cfq_schedule_dispatch(cfqd); 2149out_cont: 2150 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2151} 2152 2153/* 2154 * Timer running if an idle class queue is waiting for service 2155 */ 2156static void cfq_idle_class_timer(unsigned long data) 2157{ 2158 struct cfq_data *cfqd = (struct cfq_data *) data; 2159 unsigned long flags, end; 2160 2161 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2162 2163 /* 2164 * race with a non-idle queue, reset timer 2165 */ 2166 end = cfqd->last_end_request + CFQ_IDLE_GRACE; 2167 if (!time_after_eq(jiffies, end)) { 2168 cfqd->idle_class_timer.expires = end; 2169 add_timer(&cfqd->idle_class_timer); 2170 } else 2171 cfq_schedule_dispatch(cfqd); 2172 2173 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 2174} 2175 2176static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2177{ 2178 del_timer_sync(&cfqd->idle_slice_timer); 2179 del_timer_sync(&cfqd->idle_class_timer); 2180 blk_sync_queue(cfqd->queue); 2181} 2182 2183static void cfq_exit_queue(elevator_t *e) 2184{ 2185 struct cfq_data *cfqd = e->elevator_data; 2186 request_queue_t *q = cfqd->queue; 2187 2188 cfq_shutdown_timer_wq(cfqd); 2189 2190 write_lock(&cfq_exit_lock); 2191 spin_lock_irq(q->queue_lock); 2192 2193 if (cfqd->active_queue) 2194 __cfq_slice_expired(cfqd, cfqd->active_queue, 0); 2195 2196 while (!list_empty(&cfqd->cic_list)) { 2197 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 2198 struct cfq_io_context, 2199 queue_list); 2200 if (cic->cfqq[ASYNC]) { 2201 cfq_put_queue(cic->cfqq[ASYNC]); 2202 cic->cfqq[ASYNC] = NULL; 2203 } 2204 if (cic->cfqq[SYNC]) { 2205 cfq_put_queue(cic->cfqq[SYNC]); 2206 cic->cfqq[SYNC] = NULL; 2207 } 2208 cic->key = NULL; 2209 list_del_init(&cic->queue_list); 2210 } 2211 2212 spin_unlock_irq(q->queue_lock); 2213 write_unlock(&cfq_exit_lock); 2214 2215 cfq_shutdown_timer_wq(cfqd); 2216 2217 mempool_destroy(cfqd->crq_pool); 2218 kfree(cfqd->crq_hash); 2219 kfree(cfqd->cfq_hash); 2220 kfree(cfqd); 2221} 2222 2223static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2224{ 2225 struct cfq_data *cfqd; 2226 int i; 2227 2228 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2229 if (!cfqd) 2230 return -ENOMEM; 2231 2232 memset(cfqd, 0, sizeof(*cfqd)); 2233 2234 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2235 INIT_LIST_HEAD(&cfqd->rr_list[i]); 2236 2237 INIT_LIST_HEAD(&cfqd->busy_rr); 2238 INIT_LIST_HEAD(&cfqd->cur_rr); 2239 INIT_LIST_HEAD(&cfqd->idle_rr); 2240 INIT_LIST_HEAD(&cfqd->empty_list); 2241 INIT_LIST_HEAD(&cfqd->cic_list); 2242 2243 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 2244 if (!cfqd->crq_hash) 2245 goto out_crqhash; 2246 2247 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); 2248 if (!cfqd->cfq_hash) 2249 goto out_cfqhash; 2250 2251 cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); 2252 if (!cfqd->crq_pool) 2253 goto out_crqpool; 2254 2255 for (i = 0; i < CFQ_MHASH_ENTRIES; i++) 2256 INIT_HLIST_HEAD(&cfqd->crq_hash[i]); 2257 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2258 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2259 2260 e->elevator_data = cfqd; 2261 2262 cfqd->queue = q; 2263 2264 cfqd->max_queued = q->nr_requests / 4; 2265 q->nr_batching = cfq_queued; 2266 2267 init_timer(&cfqd->idle_slice_timer); 2268 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 2269 cfqd->idle_slice_timer.data = (unsigned long) cfqd; 2270 2271 init_timer(&cfqd->idle_class_timer); 2272 cfqd->idle_class_timer.function = cfq_idle_class_timer; 2273 cfqd->idle_class_timer.data = (unsigned long) cfqd; 2274 2275 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 2276 2277 cfqd->cfq_queued = cfq_queued; 2278 cfqd->cfq_quantum = cfq_quantum; 2279 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 2280 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 2281 cfqd->cfq_back_max = cfq_back_max; 2282 cfqd->cfq_back_penalty = cfq_back_penalty; 2283 cfqd->cfq_slice[0] = cfq_slice_async; 2284 cfqd->cfq_slice[1] = cfq_slice_sync; 2285 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2286 cfqd->cfq_slice_idle = cfq_slice_idle; 2287 2288 return 0; 2289out_crqpool: 2290 kfree(cfqd->cfq_hash); 2291out_cfqhash: 2292 kfree(cfqd->crq_hash); 2293out_crqhash: 2294 kfree(cfqd); 2295 return -ENOMEM; 2296} 2297 2298static void cfq_slab_kill(void) 2299{ 2300 if (crq_pool) 2301 kmem_cache_destroy(crq_pool); 2302 if (cfq_pool) 2303 kmem_cache_destroy(cfq_pool); 2304 if (cfq_ioc_pool) 2305 kmem_cache_destroy(cfq_ioc_pool); 2306} 2307 2308static int __init cfq_slab_setup(void) 2309{ 2310 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, 2311 NULL, NULL); 2312 if (!crq_pool) 2313 goto fail; 2314 2315 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2316 NULL, NULL); 2317 if (!cfq_pool) 2318 goto fail; 2319 2320 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", 2321 sizeof(struct cfq_io_context), 0, 0, NULL, NULL); 2322 if (!cfq_ioc_pool) 2323 goto fail; 2324 2325 return 0; 2326fail: 2327 cfq_slab_kill(); 2328 return -ENOMEM; 2329} 2330 2331/* 2332 * sysfs parts below --> 2333 */ 2334 2335static ssize_t 2336cfq_var_show(unsigned int var, char *page) 2337{ 2338 return sprintf(page, "%d\n", var); 2339} 2340 2341static ssize_t 2342cfq_var_store(unsigned int *var, const char *page, size_t count) 2343{ 2344 char *p = (char *) page; 2345 2346 *var = simple_strtoul(p, &p, 10); 2347 return count; 2348} 2349 2350#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2351static ssize_t __FUNC(elevator_t *e, char *page) \ 2352{ \ 2353 struct cfq_data *cfqd = e->elevator_data; \ 2354 unsigned int __data = __VAR; \ 2355 if (__CONV) \ 2356 __data = jiffies_to_msecs(__data); \ 2357 return cfq_var_show(__data, (page)); \ 2358} 2359SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2360SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); 2361SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2362SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2363SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2364SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); 2365SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); 2366SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2367SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2368SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2369#undef SHOW_FUNCTION 2370 2371#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2372static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2373{ \ 2374 struct cfq_data *cfqd = e->elevator_data; \ 2375 unsigned int __data; \ 2376 int ret = cfq_var_store(&__data, (page), count); \ 2377 if (__data < (MIN)) \ 2378 __data = (MIN); \ 2379 else if (__data > (MAX)) \ 2380 __data = (MAX); \ 2381 if (__CONV) \ 2382 *(__PTR) = msecs_to_jiffies(__data); \ 2383 else \ 2384 *(__PTR) = __data; \ 2385 return ret; \ 2386} 2387STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2388STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); 2389STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2390STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2391STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2392STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); 2393STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); 2394STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2395STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2396STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2397#undef STORE_FUNCTION 2398 2399#define CFQ_ATTR(name) \ 2400 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 2401 2402static struct elv_fs_entry cfq_attrs[] = { 2403 CFQ_ATTR(quantum), 2404 CFQ_ATTR(queued), 2405 CFQ_ATTR(fifo_expire_sync), 2406 CFQ_ATTR(fifo_expire_async), 2407 CFQ_ATTR(back_seek_max), 2408 CFQ_ATTR(back_seek_penalty), 2409 CFQ_ATTR(slice_sync), 2410 CFQ_ATTR(slice_async), 2411 CFQ_ATTR(slice_async_rq), 2412 CFQ_ATTR(slice_idle), 2413 __ATTR_NULL 2414}; 2415 2416static struct elevator_type iosched_cfq = { 2417 .ops = { 2418 .elevator_merge_fn = cfq_merge, 2419 .elevator_merged_fn = cfq_merged_request, 2420 .elevator_merge_req_fn = cfq_merged_requests, 2421 .elevator_dispatch_fn = cfq_dispatch_requests, 2422 .elevator_add_req_fn = cfq_insert_request, 2423 .elevator_activate_req_fn = cfq_activate_request, 2424 .elevator_deactivate_req_fn = cfq_deactivate_request, 2425 .elevator_queue_empty_fn = cfq_queue_empty, 2426 .elevator_completed_req_fn = cfq_completed_request, 2427 .elevator_former_req_fn = cfq_former_request, 2428 .elevator_latter_req_fn = cfq_latter_request, 2429 .elevator_set_req_fn = cfq_set_request, 2430 .elevator_put_req_fn = cfq_put_request, 2431 .elevator_may_queue_fn = cfq_may_queue, 2432 .elevator_init_fn = cfq_init_queue, 2433 .elevator_exit_fn = cfq_exit_queue, 2434 .trim = cfq_trim, 2435 }, 2436 .elevator_attrs = cfq_attrs, 2437 .elevator_name = "cfq", 2438 .elevator_owner = THIS_MODULE, 2439}; 2440 2441static int __init cfq_init(void) 2442{ 2443 int ret; 2444 2445 /* 2446 * could be 0 on HZ < 1000 setups 2447 */ 2448 if (!cfq_slice_async) 2449 cfq_slice_async = 1; 2450 if (!cfq_slice_idle) 2451 cfq_slice_idle = 1; 2452 2453 if (cfq_slab_setup()) 2454 return -ENOMEM; 2455 2456 ret = elv_register(&iosched_cfq); 2457 if (ret) 2458 cfq_slab_kill(); 2459 2460 return ret; 2461} 2462 2463static void __exit cfq_exit(void) 2464{ 2465 DECLARE_COMPLETION(all_gone); 2466 elv_unregister(&iosched_cfq); 2467 ioc_gone = &all_gone; 2468 /* ioc_gone's update must be visible before reading ioc_count */ 2469 smp_wmb(); 2470 if (atomic_read(&ioc_count)) 2471 wait_for_completion(ioc_gone); 2472 synchronize_rcu(); 2473 cfq_slab_kill(); 2474} 2475 2476module_init(cfq_init); 2477module_exit(cfq_exit); 2478 2479MODULE_AUTHOR("Jens Axboe"); 2480MODULE_LICENSE("GPL"); 2481MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");