Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm integrity: optimize writing dm-bufio buffers that are partially changed

Rather than write the entire dm-bufio buffer when only a subset is
changed, improve dm-bufio (and dm-integrity) by only writing the subset
of the buffer that changed.

Update dm-integrity to make use of dm-bufio's new
dm_bufio_mark_partial_buffer_dirty() interface.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Mikulas Patocka and committed by
Mike Snitzer
1e3b21c6 dc6364b5

+77 -29
+67 -28
drivers/md/dm-bufio.c
··· 64 64 #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) 65 65 66 66 /* 67 + * Align buffer writes to this boundary. 68 + * Tests show that SSDs have the highest IOPS when using 4k writes. 69 + */ 70 + #define DM_BUFIO_WRITE_ALIGN 4096 71 + 72 + /* 67 73 * dm_buffer->list_mode 68 74 */ 69 75 #define LIST_CLEAN 0 ··· 155 149 blk_status_t write_error; 156 150 unsigned long state; 157 151 unsigned long last_accessed; 152 + unsigned dirty_start; 153 + unsigned dirty_end; 154 + unsigned write_start; 155 + unsigned write_end; 158 156 struct dm_bufio_client *c; 159 157 struct list_head write_list; 160 158 struct bio bio; ··· 570 560 } 571 561 572 562 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, 573 - unsigned n_sectors, bio_end_io_t *end_io) 563 + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) 574 564 { 575 565 int r; 576 566 struct dm_io_request io_req = { ··· 588 578 589 579 if (b->data_mode != DATA_MODE_VMALLOC) { 590 580 io_req.mem.type = DM_IO_KMEM; 591 - io_req.mem.ptr.addr = b->data; 581 + io_req.mem.ptr.addr = (char *)b->data + offset; 592 582 } else { 593 583 io_req.mem.type = DM_IO_VMA; 594 - io_req.mem.ptr.vma = b->data; 584 + io_req.mem.ptr.vma = (char *)b->data + offset; 595 585 } 596 586 597 587 b->bio.bi_end_io = end_io; ··· 619 609 } 620 610 621 611 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, 622 - unsigned n_sectors, bio_end_io_t *end_io) 612 + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) 623 613 { 624 614 char *ptr; 625 - int len; 615 + unsigned len; 626 616 627 617 bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); 628 618 b->bio.bi_iter.bi_sector = sector; ··· 635 625 b->bio.bi_private = end_io; 636 626 bio_set_op_attrs(&b->bio, rw, 0); 637 627 638 - /* 639 - * We assume that if len >= PAGE_SIZE ptr is page-aligned. 640 - * If len < PAGE_SIZE the buffer doesn't cross page boundary. 641 - */ 642 - ptr = b->data; 628 + ptr = (char *)b->data + offset; 643 629 len = n_sectors << SECTOR_SHIFT; 644 630 645 - if (len >= PAGE_SIZE) 646 - BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); 647 - else 648 - BUG_ON((unsigned long)ptr & (len - 1)); 649 - 650 631 do { 651 - if (!bio_add_page(&b->bio, virt_to_page(ptr), 652 - len < PAGE_SIZE ? len : PAGE_SIZE, 632 + unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); 633 + if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, 653 634 offset_in_page(ptr))) { 654 635 BUG_ON(b->c->block_size <= PAGE_SIZE); 655 - use_dmio(b, rw, sector, n_sectors, end_io); 636 + use_dmio(b, rw, sector, n_sectors, offset, end_io); 656 637 return; 657 638 } 658 639 659 - len -= PAGE_SIZE; 660 - ptr += PAGE_SIZE; 640 + len -= this_step; 641 + ptr += this_step; 661 642 } while (len > 0); 662 643 663 644 submit_bio(&b->bio); ··· 658 657 { 659 658 unsigned n_sectors; 660 659 sector_t sector; 661 - 662 - if (rw == WRITE && b->c->write_callback) 663 - b->c->write_callback(b); 660 + unsigned offset, end; 664 661 665 662 sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; 666 - n_sectors = 1 << b->c->sectors_per_block_bits; 663 + 664 + if (rw != WRITE) { 665 + n_sectors = 1 << b->c->sectors_per_block_bits; 666 + offset = 0; 667 + } else { 668 + if (b->c->write_callback) 669 + b->c->write_callback(b); 670 + offset = b->write_start; 671 + end = b->write_end; 672 + offset &= -DM_BUFIO_WRITE_ALIGN; 673 + end += DM_BUFIO_WRITE_ALIGN - 1; 674 + end &= -DM_BUFIO_WRITE_ALIGN; 675 + if (unlikely(end > b->c->block_size)) 676 + end = b->c->block_size; 677 + 678 + sector += offset >> SECTOR_SHIFT; 679 + n_sectors = (end - offset) >> SECTOR_SHIFT; 680 + } 667 681 668 682 if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && 669 683 b->data_mode != DATA_MODE_VMALLOC) 670 - use_inline_bio(b, rw, sector, n_sectors, end_io); 684 + use_inline_bio(b, rw, sector, n_sectors, offset, end_io); 671 685 else 672 - use_dmio(b, rw, sector, n_sectors, end_io); 686 + use_dmio(b, rw, sector, n_sectors, offset, end_io); 673 687 } 674 688 675 689 /*---------------------------------------------------------------- ··· 735 719 736 720 clear_bit(B_DIRTY, &b->state); 737 721 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 722 + 723 + b->write_start = b->dirty_start; 724 + b->write_end = b->dirty_end; 738 725 739 726 if (!write_list) 740 727 submit_io(b, WRITE, write_endio); ··· 1240 1221 } 1241 1222 EXPORT_SYMBOL_GPL(dm_bufio_release); 1242 1223 1243 - void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1224 + void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 1225 + unsigned start, unsigned end) 1244 1226 { 1245 1227 struct dm_bufio_client *c = b->c; 1228 + 1229 + BUG_ON(start >= end); 1230 + BUG_ON(end > b->c->block_size); 1246 1231 1247 1232 dm_bufio_lock(c); 1248 1233 1249 1234 BUG_ON(test_bit(B_READING, &b->state)); 1250 1235 1251 - if (!test_and_set_bit(B_DIRTY, &b->state)) 1236 + if (!test_and_set_bit(B_DIRTY, &b->state)) { 1237 + b->dirty_start = start; 1238 + b->dirty_end = end; 1252 1239 __relink_lru(b, LIST_DIRTY); 1240 + } else { 1241 + if (start < b->dirty_start) 1242 + b->dirty_start = start; 1243 + if (end > b->dirty_end) 1244 + b->dirty_end = end; 1245 + } 1253 1246 1254 1247 dm_bufio_unlock(c); 1248 + } 1249 + EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); 1250 + 1251 + void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1252 + { 1253 + dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); 1255 1254 } 1256 1255 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 1257 1256 ··· 1435 1398 wait_on_bit_io(&b->state, B_WRITING, 1436 1399 TASK_UNINTERRUPTIBLE); 1437 1400 set_bit(B_DIRTY, &b->state); 1401 + b->dirty_start = 0; 1402 + b->dirty_end = c->block_size; 1438 1403 __unlink_buffer(b); 1439 1404 __link_buffer(b, new_block, LIST_DIRTY); 1440 1405 } else {
+9
drivers/md/dm-bufio.h
··· 94 94 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); 95 95 96 96 /* 97 + * Mark a part of the buffer dirty. 98 + * 99 + * The specified part of the buffer is scheduled to be written. dm-bufio may 100 + * write the specified part of the buffer or it may write a larger superset. 101 + */ 102 + void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 103 + unsigned start, unsigned end); 104 + 105 + /* 97 106 * Initiate writing of dirty buffers, without waiting for completion. 98 107 */ 99 108 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
+1 -1
drivers/md/dm-integrity.c
··· 1040 1040 memcpy(tag, dp, to_copy); 1041 1041 } else if (op == TAG_WRITE) { 1042 1042 memcpy(dp, tag, to_copy); 1043 - dm_bufio_mark_buffer_dirty(b); 1043 + dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1044 1044 } else { 1045 1045 /* e.g.: op == TAG_CMP */ 1046 1046 if (unlikely(memcmp(dp, tag, to_copy))) {