Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm integrity: optimize writing dm-bufio buffers that are partially changed

Rather than write the entire dm-bufio buffer when only a subset is
changed, improve dm-bufio (and dm-integrity) by only writing the subset
of the buffer that changed.

Update dm-integrity to make use of dm-bufio's new
dm_bufio_mark_partial_buffer_dirty() interface.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Mikulas Patocka and committed by
Mike Snitzer
1e3b21c6 dc6364b5

+77 -29
+67 -28
drivers/md/dm-bufio.c
··· 64 #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) 65 66 /* 67 * dm_buffer->list_mode 68 */ 69 #define LIST_CLEAN 0 ··· 155 blk_status_t write_error; 156 unsigned long state; 157 unsigned long last_accessed; 158 struct dm_bufio_client *c; 159 struct list_head write_list; 160 struct bio bio; ··· 570 } 571 572 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, 573 - unsigned n_sectors, bio_end_io_t *end_io) 574 { 575 int r; 576 struct dm_io_request io_req = { ··· 588 589 if (b->data_mode != DATA_MODE_VMALLOC) { 590 io_req.mem.type = DM_IO_KMEM; 591 - io_req.mem.ptr.addr = b->data; 592 } else { 593 io_req.mem.type = DM_IO_VMA; 594 - io_req.mem.ptr.vma = b->data; 595 } 596 597 b->bio.bi_end_io = end_io; ··· 619 } 620 621 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, 622 - unsigned n_sectors, bio_end_io_t *end_io) 623 { 624 char *ptr; 625 - int len; 626 627 bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); 628 b->bio.bi_iter.bi_sector = sector; ··· 635 b->bio.bi_private = end_io; 636 bio_set_op_attrs(&b->bio, rw, 0); 637 638 - /* 639 - * We assume that if len >= PAGE_SIZE ptr is page-aligned. 640 - * If len < PAGE_SIZE the buffer doesn't cross page boundary. 641 - */ 642 - ptr = b->data; 643 len = n_sectors << SECTOR_SHIFT; 644 645 - if (len >= PAGE_SIZE) 646 - BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); 647 - else 648 - BUG_ON((unsigned long)ptr & (len - 1)); 649 - 650 do { 651 - if (!bio_add_page(&b->bio, virt_to_page(ptr), 652 - len < PAGE_SIZE ? len : PAGE_SIZE, 653 offset_in_page(ptr))) { 654 BUG_ON(b->c->block_size <= PAGE_SIZE); 655 - use_dmio(b, rw, sector, n_sectors, end_io); 656 return; 657 } 658 659 - len -= PAGE_SIZE; 660 - ptr += PAGE_SIZE; 661 } while (len > 0); 662 663 submit_bio(&b->bio); ··· 658 { 659 unsigned n_sectors; 660 sector_t sector; 661 - 662 - if (rw == WRITE && b->c->write_callback) 663 - b->c->write_callback(b); 664 665 sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; 666 - n_sectors = 1 << b->c->sectors_per_block_bits; 667 668 if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && 669 b->data_mode != DATA_MODE_VMALLOC) 670 - use_inline_bio(b, rw, sector, n_sectors, end_io); 671 else 672 - use_dmio(b, rw, sector, n_sectors, end_io); 673 } 674 675 /*---------------------------------------------------------------- ··· 735 736 clear_bit(B_DIRTY, &b->state); 737 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 738 739 if (!write_list) 740 submit_io(b, WRITE, write_endio); ··· 1240 } 1241 EXPORT_SYMBOL_GPL(dm_bufio_release); 1242 1243 - void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1244 { 1245 struct dm_bufio_client *c = b->c; 1246 1247 dm_bufio_lock(c); 1248 1249 BUG_ON(test_bit(B_READING, &b->state)); 1250 1251 - if (!test_and_set_bit(B_DIRTY, &b->state)) 1252 __relink_lru(b, LIST_DIRTY); 1253 1254 dm_bufio_unlock(c); 1255 } 1256 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 1257 ··· 1435 wait_on_bit_io(&b->state, B_WRITING, 1436 TASK_UNINTERRUPTIBLE); 1437 set_bit(B_DIRTY, &b->state); 1438 __unlink_buffer(b); 1439 __link_buffer(b, new_block, LIST_DIRTY); 1440 } else {
··· 64 #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) 65 66 /* 67 + * Align buffer writes to this boundary. 68 + * Tests show that SSDs have the highest IOPS when using 4k writes. 69 + */ 70 + #define DM_BUFIO_WRITE_ALIGN 4096 71 + 72 + /* 73 * dm_buffer->list_mode 74 */ 75 #define LIST_CLEAN 0 ··· 149 blk_status_t write_error; 150 unsigned long state; 151 unsigned long last_accessed; 152 + unsigned dirty_start; 153 + unsigned dirty_end; 154 + unsigned write_start; 155 + unsigned write_end; 156 struct dm_bufio_client *c; 157 struct list_head write_list; 158 struct bio bio; ··· 560 } 561 562 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, 563 + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) 564 { 565 int r; 566 struct dm_io_request io_req = { ··· 578 579 if (b->data_mode != DATA_MODE_VMALLOC) { 580 io_req.mem.type = DM_IO_KMEM; 581 + io_req.mem.ptr.addr = (char *)b->data + offset; 582 } else { 583 io_req.mem.type = DM_IO_VMA; 584 + io_req.mem.ptr.vma = (char *)b->data + offset; 585 } 586 587 b->bio.bi_end_io = end_io; ··· 609 } 610 611 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, 612 + unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) 613 { 614 char *ptr; 615 + unsigned len; 616 617 bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); 618 b->bio.bi_iter.bi_sector = sector; ··· 625 b->bio.bi_private = end_io; 626 bio_set_op_attrs(&b->bio, rw, 0); 627 628 + ptr = (char *)b->data + offset; 629 len = n_sectors << SECTOR_SHIFT; 630 631 do { 632 + unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); 633 + if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, 634 offset_in_page(ptr))) { 635 BUG_ON(b->c->block_size <= PAGE_SIZE); 636 + use_dmio(b, rw, sector, n_sectors, offset, end_io); 637 return; 638 } 639 640 + len -= this_step; 641 + ptr += this_step; 642 } while (len > 0); 643 644 submit_bio(&b->bio); ··· 657 { 658 unsigned n_sectors; 659 sector_t sector; 660 + unsigned offset, end; 661 662 sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; 663 + 664 + if (rw != WRITE) { 665 + n_sectors = 1 << b->c->sectors_per_block_bits; 666 + offset = 0; 667 + } else { 668 + if (b->c->write_callback) 669 + b->c->write_callback(b); 670 + offset = b->write_start; 671 + end = b->write_end; 672 + offset &= -DM_BUFIO_WRITE_ALIGN; 673 + end += DM_BUFIO_WRITE_ALIGN - 1; 674 + end &= -DM_BUFIO_WRITE_ALIGN; 675 + if (unlikely(end > b->c->block_size)) 676 + end = b->c->block_size; 677 + 678 + sector += offset >> SECTOR_SHIFT; 679 + n_sectors = (end - offset) >> SECTOR_SHIFT; 680 + } 681 682 if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && 683 b->data_mode != DATA_MODE_VMALLOC) 684 + use_inline_bio(b, rw, sector, n_sectors, offset, end_io); 685 else 686 + use_dmio(b, rw, sector, n_sectors, offset, end_io); 687 } 688 689 /*---------------------------------------------------------------- ··· 719 720 clear_bit(B_DIRTY, &b->state); 721 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 722 + 723 + b->write_start = b->dirty_start; 724 + b->write_end = b->dirty_end; 725 726 if (!write_list) 727 submit_io(b, WRITE, write_endio); ··· 1221 } 1222 EXPORT_SYMBOL_GPL(dm_bufio_release); 1223 1224 + void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 1225 + unsigned start, unsigned end) 1226 { 1227 struct dm_bufio_client *c = b->c; 1228 + 1229 + BUG_ON(start >= end); 1230 + BUG_ON(end > b->c->block_size); 1231 1232 dm_bufio_lock(c); 1233 1234 BUG_ON(test_bit(B_READING, &b->state)); 1235 1236 + if (!test_and_set_bit(B_DIRTY, &b->state)) { 1237 + b->dirty_start = start; 1238 + b->dirty_end = end; 1239 __relink_lru(b, LIST_DIRTY); 1240 + } else { 1241 + if (start < b->dirty_start) 1242 + b->dirty_start = start; 1243 + if (end > b->dirty_end) 1244 + b->dirty_end = end; 1245 + } 1246 1247 dm_bufio_unlock(c); 1248 + } 1249 + EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); 1250 + 1251 + void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1252 + { 1253 + dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); 1254 } 1255 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 1256 ··· 1398 wait_on_bit_io(&b->state, B_WRITING, 1399 TASK_UNINTERRUPTIBLE); 1400 set_bit(B_DIRTY, &b->state); 1401 + b->dirty_start = 0; 1402 + b->dirty_end = c->block_size; 1403 __unlink_buffer(b); 1404 __link_buffer(b, new_block, LIST_DIRTY); 1405 } else {
+9
drivers/md/dm-bufio.h
··· 94 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); 95 96 /* 97 * Initiate writing of dirty buffers, without waiting for completion. 98 */ 99 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
··· 94 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); 95 96 /* 97 + * Mark a part of the buffer dirty. 98 + * 99 + * The specified part of the buffer is scheduled to be written. dm-bufio may 100 + * write the specified part of the buffer or it may write a larger superset. 101 + */ 102 + void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 103 + unsigned start, unsigned end); 104 + 105 + /* 106 * Initiate writing of dirty buffers, without waiting for completion. 107 */ 108 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
+1 -1
drivers/md/dm-integrity.c
··· 1040 memcpy(tag, dp, to_copy); 1041 } else if (op == TAG_WRITE) { 1042 memcpy(dp, tag, to_copy); 1043 - dm_bufio_mark_buffer_dirty(b); 1044 } else { 1045 /* e.g.: op == TAG_CMP */ 1046 if (unlikely(memcmp(dp, tag, to_copy))) {
··· 1040 memcpy(tag, dp, to_copy); 1041 } else if (op == TAG_WRITE) { 1042 memcpy(dp, tag, to_copy); 1043 + dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1044 } else { 1045 /* e.g.: op == TAG_CMP */ 1046 if (unlikely(memcmp(dp, tag, to_copy))) {