Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm snapshot: skip reading origin when overwriting complete chunk

If we write a full chunk in the snapshot, skip reading the origin device
because the whole chunk will be overwritten anyway.

This patch changes the snapshot write logic when a full chunk is written.
In this case:
1. allocate the exception
2. dispatch the bio (but don't report the bio completion to device mapper)
3. write the exception record
4. report bio completed

Callbacks must be done through the kcopyd thread, because callbacks must not
race with each other. So we create two new functions:

dm_kcopyd_prepare_callback: allocate a job structure and prepare the callback.
(This function must not be called from interrupt context.)

dm_kcopyd_do_callback: submit callback.
(This function may be called from interrupt context.)

Performance test (on snapshots with 4k chunk size):
without the patch:
non-direct-io sequential write (dd): 17.7MB/s
direct-io sequential write (dd): 20.9MB/s
non-direct-io random write (mkfs.ext2): 0.44s

with the patch:
non-direct-io sequential write (dd): 26.5MB/s
direct-io sequential write (dd): 33.2MB/s
non-direct-io random write (mkfs.ext2): 0.27s

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Mikulas Patocka and committed by
Alasdair G Kergon
a6e50b40 d5b9dd04

+103 -3
+31
drivers/md/dm-kcopyd.c
··· 617 617 } 618 618 EXPORT_SYMBOL(dm_kcopyd_copy); 619 619 620 + void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, 621 + dm_kcopyd_notify_fn fn, void *context) 622 + { 623 + struct kcopyd_job *job; 624 + 625 + job = mempool_alloc(kc->job_pool, GFP_NOIO); 626 + 627 + memset(job, 0, sizeof(struct kcopyd_job)); 628 + job->kc = kc; 629 + job->fn = fn; 630 + job->context = context; 631 + 632 + atomic_inc(&kc->nr_jobs); 633 + 634 + return job; 635 + } 636 + EXPORT_SYMBOL(dm_kcopyd_prepare_callback); 637 + 638 + void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) 639 + { 640 + struct kcopyd_job *job = j; 641 + struct dm_kcopyd_client *kc = job->kc; 642 + 643 + job->read_err = read_err; 644 + job->write_err = write_err; 645 + 646 + push(&kc->complete_jobs, job); 647 + wake(kc); 648 + } 649 + EXPORT_SYMBOL(dm_kcopyd_do_callback); 650 + 620 651 /* 621 652 * Cancels a kcopyd job, eg. someone might be deactivating a 622 653 * mirror.
+57 -3
drivers/md/dm-snap.c
··· 170 170 * kcopyd. 171 171 */ 172 172 int started; 173 + 174 + /* 175 + * For writing a complete chunk, bypassing the copy. 176 + */ 177 + struct bio *full_bio; 178 + bio_end_io_t *full_bio_end_io; 179 + void *full_bio_private; 173 180 }; 174 181 175 182 /* ··· 1376 1369 struct dm_snapshot *s = pe->snap; 1377 1370 struct bio *origin_bios = NULL; 1378 1371 struct bio *snapshot_bios = NULL; 1372 + struct bio *full_bio = NULL; 1379 1373 int error = 0; 1380 1374 1381 1375 if (!success) { ··· 1416 1408 dm_remove_exception(&pe->e); 1417 1409 snapshot_bios = bio_list_get(&pe->snapshot_bios); 1418 1410 origin_bios = bio_list_get(&pe->origin_bios); 1411 + full_bio = pe->full_bio; 1412 + if (full_bio) { 1413 + full_bio->bi_end_io = pe->full_bio_end_io; 1414 + full_bio->bi_private = pe->full_bio_private; 1415 + } 1419 1416 free_pending_exception(pe); 1420 1417 1421 1418 increment_pending_exceptions_done_count(); ··· 1428 1415 up_write(&s->lock); 1429 1416 1430 1417 /* Submit any pending write bios */ 1431 - if (error) 1418 + if (error) { 1419 + if (full_bio) 1420 + bio_io_error(full_bio); 1432 1421 error_bios(snapshot_bios); 1433 - else 1422 + } else { 1423 + if (full_bio) 1424 + bio_endio(full_bio, 0); 1434 1425 flush_bios(snapshot_bios); 1426 + } 1435 1427 1436 1428 retry_origin_bios(s, origin_bios); 1437 1429 } ··· 1490 1472 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 1491 1473 } 1492 1474 1475 + static void full_bio_end_io(struct bio *bio, int error) 1476 + { 1477 + void *callback_data = bio->bi_private; 1478 + 1479 + dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); 1480 + } 1481 + 1482 + static void start_full_bio(struct dm_snap_pending_exception *pe, 1483 + struct bio *bio) 1484 + { 1485 + struct dm_snapshot *s = pe->snap; 1486 + void *callback_data; 1487 + 1488 + pe->full_bio = bio; 1489 + pe->full_bio_end_io = bio->bi_end_io; 1490 + pe->full_bio_private = bio->bi_private; 1491 + 1492 + callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1493 + copy_callback, pe); 1494 + 1495 + bio->bi_end_io = full_bio_end_io; 1496 + bio->bi_private = callback_data; 1497 + 1498 + generic_make_request(bio); 1499 + } 1500 + 1493 1501 static struct dm_snap_pending_exception * 1494 1502 __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 1495 1503 { ··· 1551 1507 bio_list_init(&pe->origin_bios); 1552 1508 bio_list_init(&pe->snapshot_bios); 1553 1509 pe->started = 0; 1510 + pe->full_bio = NULL; 1554 1511 1555 1512 if (s->store->type->prepare_exception(s->store, &pe->e)) { 1556 1513 free_pending_exception(pe); ··· 1645 1600 } 1646 1601 1647 1602 remap_exception(s, &pe->e, bio, chunk); 1648 - bio_list_add(&pe->snapshot_bios, bio); 1649 1603 1650 1604 r = DM_MAPIO_SUBMITTED; 1605 + 1606 + if (!pe->started && 1607 + bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { 1608 + pe->started = 1; 1609 + up_write(&s->lock); 1610 + start_full_bio(pe, bio); 1611 + goto out; 1612 + } 1613 + 1614 + bio_list_add(&pe->snapshot_bios, bio); 1651 1615 1652 1616 if (!pe->started) { 1653 1617 /* this is protected by snap->lock */
+15
include/linux/dm-kcopyd.h
··· 42 42 unsigned num_dests, struct dm_io_region *dests, 43 43 unsigned flags, dm_kcopyd_notify_fn fn, void *context); 44 44 45 + /* 46 + * Prepare a callback and submit it via the kcopyd thread. 47 + * 48 + * dm_kcopyd_prepare_callback allocates a callback structure and returns it. 49 + * It must not be called from interrupt context. 50 + * The returned value should be passed into dm_kcopyd_do_callback. 51 + * 52 + * dm_kcopyd_do_callback submits the callback. 53 + * It may be called from interrupt context. 54 + * The callback is issued from the kcopyd thread. 55 + */ 56 + void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, 57 + dm_kcopyd_notify_fn fn, void *context); 58 + void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); 59 + 45 60 #endif /* __KERNEL__ */ 46 61 #endif /* _LINUX_DM_KCOPYD_H */