Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm kcopyd: add sequential write feature

When copyying blocks to host-managed zoned block devices, writes must be
sequential. However, dm_kcopyd_copy() does not guarantee this as writes
are issued in the completion order of reads, and reads may complete out
of order despite being issued sequentially.

Fix this by introducing the DM_KCOPYD_WRITE_SEQ feature flag. This can
be specified when calling dm_kcopyd_copy() and should be set
automatically if one of the destinations is a host-managed zoned block
device. For a split job, the master job maintains the write position at
which writes must be issued. This is checked with the pop() function
which is modified to not return any write I/O sub job that is not at the
correct write position.

When DM_KCOPYD_WRITE_SEQ is specified for a job, errors cannot be
ignored and the flag DM_KCOPYD_IGNORE_ERROR is ignored, even if
specified by the user.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Damien Le Moal and committed by
Mike Snitzer
b73c67c2 0be12c1c

+64 -2
+63 -2
drivers/md/dm-kcopyd.c
··· 356 356 struct mutex lock; 357 357 atomic_t sub_jobs; 358 358 sector_t progress; 359 + sector_t write_offset; 359 360 360 361 struct kcopyd_job *master_job; 361 362 }; ··· 387 386 * Functions to push and pop a job onto the head of a given job 388 387 * list. 389 388 */ 389 + static struct kcopyd_job *pop_io_job(struct list_head *jobs, 390 + struct dm_kcopyd_client *kc) 391 + { 392 + struct kcopyd_job *job; 393 + 394 + /* 395 + * For I/O jobs, pop any read, any write without sequential write 396 + * constraint and sequential writes that are at the right position. 397 + */ 398 + list_for_each_entry(job, jobs, list) { 399 + if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { 400 + list_del(&job->list); 401 + return job; 402 + } 403 + 404 + if (job->write_offset == job->master_job->write_offset) { 405 + job->master_job->write_offset += job->source.count; 406 + list_del(&job->list); 407 + return job; 408 + } 409 + } 410 + 411 + return NULL; 412 + } 413 + 390 414 static struct kcopyd_job *pop(struct list_head *jobs, 391 415 struct dm_kcopyd_client *kc) 392 416 { ··· 421 395 spin_lock_irqsave(&kc->job_lock, flags); 422 396 423 397 if (!list_empty(jobs)) { 424 - job = list_entry(jobs->next, struct kcopyd_job, list); 425 - list_del(&job->list); 398 + if (jobs == &kc->io_jobs) 399 + job = pop_io_job(jobs, kc); 400 + else { 401 + job = list_entry(jobs->next, struct kcopyd_job, list); 402 + list_del(&job->list); 403 + } 426 404 } 427 405 spin_unlock_irqrestore(&kc->job_lock, flags); 428 406 ··· 535 505 .notify.context = job, 536 506 .client = job->kc->io_client, 537 507 }; 508 + 509 + /* 510 + * If we need to write sequentially and some reads or writes failed, 511 + * no point in continuing. 512 + */ 513 + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && 514 + job->master_job->write_err) 515 + return -EIO; 538 516 539 517 io_job_start(job->kc->throttle); 540 518 ··· 693 655 int i; 694 656 695 657 *sub_job = *job; 658 + sub_job->write_offset = progress; 696 659 sub_job->source.sector += progress; 697 660 sub_job->source.count = count; 698 661 ··· 762 723 job->num_dests = num_dests; 763 724 memcpy(&job->dests, dests, sizeof(*dests) * num_dests); 764 725 726 + /* 727 + * If one of the destination is a host-managed zoned block device, 728 + * we need to write sequentially. If one of the destination is a 729 + * host-aware device, then leave it to the caller to choose what to do. 730 + */ 731 + if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { 732 + for (i = 0; i < job->num_dests; i++) { 733 + if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { 734 + set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags); 735 + break; 736 + } 737 + } 738 + } 739 + 740 + /* 741 + * If we need to write sequentially, errors cannot be ignored. 742 + */ 743 + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && 744 + test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) 745 + clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags); 746 + 765 747 if (from) { 766 748 job->source = *from; 767 749 job->pages = NULL; ··· 806 746 job->fn = fn; 807 747 job->context = context; 808 748 job->master_job = job; 749 + job->write_offset = 0; 809 750 810 751 if (job->source.count <= SUB_JOB_SIZE) 811 752 dispatch_job(job);
+1
include/linux/dm-kcopyd.h
··· 20 20 #define DM_KCOPYD_MAX_REGIONS 8 21 21 22 22 #define DM_KCOPYD_IGNORE_ERROR 1 23 + #define DM_KCOPYD_WRITE_SEQ 2 23 24 24 25 struct dm_kcopyd_throttle { 25 26 unsigned throttle;