Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm cache policy mq: tweak algorithm that decides when to promote a block

Rather than maintaining a separate promote_threshold variable that we
periodically update we now use the hit count of the oldest clean
block. Also add a fudge factor to discourage demoting dirty blocks.

With some tests this has a sizeable difference, because the old code
was too eager to demote blocks. For example, device-mapper-test-suite's
git_extract_cache_quick test goes from taking 190 seconds, to 142
(linear on spindle takes 250).

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Joe Thornber and committed by
Mike Snitzer
b155aa0e 41abc4e1

+54 -29
+3 -3
Documentation/device-mapper/cache-policies.txt
··· 58 58 contiguous I/Os to try to spot when the io is in one of these sequential 59 59 modes. 60 60 61 - Internally the mq policy maintains a promotion threshold variable. If 62 - the hit count of a block not in the cache goes above this threshold it 63 - gets promoted to the cache. The read, write and discard promote adjustment 61 + Internally the mq policy determines a promotion threshold. If the hit 62 + count of a block not in the cache goes above this threshold it gets 63 + promoted to the cache. The read, write and discard promote adjustment 64 64 tunables allow you to tweak the promotion threshold by adding a small 65 65 value based on the io type. They default to 4, 8 and 1 respectively. 66 66 If you're trying to quickly warm a new cache device you may wish to
+51 -26
drivers/md/dm-cache-policy-mq.c
··· 181 181 * Gives us the oldest entry of the lowest popoulated level. If the first 182 182 * level is emptied then we shift down one level. 183 183 */ 184 - static struct list_head *queue_pop(struct queue *q) 184 + static struct list_head *queue_peek(struct queue *q) 185 185 { 186 186 unsigned level; 187 - struct list_head *r; 188 187 189 188 for (level = 0; level < NR_QUEUE_LEVELS; level++) 190 - if (!list_empty(q->qs + level)) { 191 - r = q->qs[level].next; 192 - list_del(r); 193 - 194 - /* have we just emptied the bottom level? */ 195 - if (level == 0 && list_empty(q->qs)) 196 - queue_shift_down(q); 197 - 198 - return r; 199 - } 189 + if (!list_empty(q->qs + level)) 190 + return q->qs[level].next; 200 191 201 192 return NULL; 193 + } 194 + 195 + static struct list_head *queue_pop(struct queue *q) 196 + { 197 + struct list_head *r = queue_peek(q); 198 + 199 + if (r) { 200 + list_del(r); 201 + 202 + /* have we just emptied the bottom level? */ 203 + if (list_empty(q->qs)) 204 + queue_shift_down(q); 205 + } 206 + 207 + return r; 202 208 } 203 209 204 210 static struct list_head *list_pop(struct list_head *lh) ··· 389 383 unsigned generation; 390 384 unsigned generation_period; /* in lookups (will probably change) */ 391 385 392 - /* 393 - * Entries in the pre_cache whose hit count passes the promotion 394 - * threshold move to the cache proper. Working out the correct 395 - * value for the promotion_threshold is crucial to this policy. 396 - */ 397 - unsigned promote_threshold; 398 - 399 386 unsigned discard_promote_adjustment; 400 387 unsigned read_promote_adjustment; 401 388 unsigned write_promote_adjustment; ··· 405 406 #define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1 406 407 #define DEFAULT_READ_PROMOTE_ADJUSTMENT 4 407 408 #define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8 409 + #define DISCOURAGE_DEMOTING_DIRTY_THRESHOLD 128 408 410 409 411 /*----------------------------------------------------------------*/ 410 412 ··· 518 518 return e; 519 519 } 520 520 521 + static struct entry *peek(struct queue *q) 522 + { 523 + struct list_head *h = queue_peek(q); 524 + return h ? container_of(h, struct entry, list) : NULL; 525 + } 526 + 521 527 /* 522 528 * Has this entry already been updated? 523 529 */ ··· 576 570 break; 577 571 } 578 572 } 579 - 580 - mq->promote_threshold = nr ? total / nr : 1; 581 - if (mq->promote_threshold * nr < total) 582 - mq->promote_threshold++; 583 573 } 584 574 } 585 575 ··· 643 641 } 644 642 645 643 /* 644 + * Entries in the pre_cache whose hit count passes the promotion 645 + * threshold move to the cache proper. Working out the correct 646 + * value for the promotion_threshold is crucial to this policy. 647 + */ 648 + static unsigned promote_threshold(struct mq_policy *mq) 649 + { 650 + struct entry *e; 651 + 652 + if (any_free_cblocks(mq)) 653 + return 0; 654 + 655 + e = peek(&mq->cache_clean); 656 + if (e) 657 + return e->hit_count; 658 + 659 + e = peek(&mq->cache_dirty); 660 + if (e) 661 + return e->hit_count + DISCOURAGE_DEMOTING_DIRTY_THRESHOLD; 662 + 663 + /* This should never happen */ 664 + return 0; 665 + } 666 + 667 + /* 646 668 * We modify the basic promotion_threshold depending on the specific io. 647 669 * 648 670 * If the origin block has been discarded then there's no cost to copy it ··· 679 653 bool discarded_oblock, int data_dir) 680 654 { 681 655 if (data_dir == READ) 682 - return mq->promote_threshold + mq->read_promote_adjustment; 656 + return promote_threshold(mq) + mq->read_promote_adjustment; 683 657 684 658 if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { 685 659 /* ··· 689 663 return mq->discard_promote_adjustment; 690 664 } 691 665 692 - return mq->promote_threshold + mq->write_promote_adjustment; 666 + return promote_threshold(mq) + mq->write_promote_adjustment; 693 667 } 694 668 695 669 static bool should_promote(struct mq_policy *mq, struct entry *e, ··· 1256 1230 mq->tick = 0; 1257 1231 mq->hit_count = 0; 1258 1232 mq->generation = 0; 1259 - mq->promote_threshold = 0; 1260 1233 mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT; 1261 1234 mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT; 1262 1235 mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT;