Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm kcopyd: add WRITE SAME support to dm_kcopyd_zero

Add WRITE SAME support to dm-io and make it accessible to
dm_kcopyd_zero(). dm_kcopyd_zero() provides an asynchronous interface
whereas the blkdev_issue_write_same() interface is synchronous.

WRITE SAME is a SCSI command that can be leveraged for more efficient
zeroing of a specified logical extent of a device which supports it.
Only a single zeroed logical block is transfered to the target for each
WRITE SAME and the target then writes that same block across the
specified extent.

The dm thin target uses this.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Mike Snitzer and committed by
Alasdair G Kergon
70d6c400 4f0b70b0

+33 -10
+18 -5
drivers/md/dm-io.c
··· 287 287 unsigned num_bvecs; 288 288 sector_t remaining = where->count; 289 289 struct request_queue *q = bdev_get_queue(where->bdev); 290 - sector_t discard_sectors; 290 + unsigned short logical_block_size = queue_logical_block_size(q); 291 + sector_t num_sectors; 291 292 292 293 /* 293 294 * where->count may be zero if rw holds a flush and we need to ··· 298 297 /* 299 298 * Allocate a suitably sized-bio. 300 299 */ 301 - if (rw & REQ_DISCARD) 300 + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) 302 301 num_bvecs = 1; 303 302 else 304 303 num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), ··· 311 310 store_io_and_region_in_bio(bio, io, region); 312 311 313 312 if (rw & REQ_DISCARD) { 314 - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 315 - bio->bi_size = discard_sectors << SECTOR_SHIFT; 316 - remaining -= discard_sectors; 313 + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 314 + bio->bi_size = num_sectors << SECTOR_SHIFT; 315 + remaining -= num_sectors; 316 + } else if (rw & REQ_WRITE_SAME) { 317 + /* 318 + * WRITE SAME only uses a single page. 319 + */ 320 + dp->get_page(dp, &page, &len, &offset); 321 + bio_add_page(bio, page, logical_block_size, offset); 322 + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); 323 + bio->bi_size = num_sectors << SECTOR_SHIFT; 324 + 325 + offset = 0; 326 + remaining -= num_sectors; 327 + dp->next_page(dp); 317 328 } else while (remaining) { 318 329 /* 319 330 * Try and add as many pages as possible.
+14 -4
drivers/md/dm-kcopyd.c
··· 349 349 struct dm_kcopyd_client *kc = job->kc; 350 350 351 351 if (error) { 352 - if (job->rw == WRITE) 352 + if (job->rw & WRITE) 353 353 job->write_err |= error; 354 354 else 355 355 job->read_err = 1; ··· 361 361 } 362 362 } 363 363 364 - if (job->rw == WRITE) 364 + if (job->rw & WRITE) 365 365 push(&kc->complete_jobs, job); 366 366 367 367 else { ··· 432 432 433 433 if (r < 0) { 434 434 /* error this rogue job */ 435 - if (job->rw == WRITE) 435 + if (job->rw & WRITE) 436 436 job->write_err = (unsigned long) -1L; 437 437 else 438 438 job->read_err = 1; ··· 585 585 unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 586 586 { 587 587 struct kcopyd_job *job; 588 + int i; 588 589 589 590 /* 590 591 * Allocate an array of jobs consisting of one master job ··· 612 611 memset(&job->source, 0, sizeof job->source); 613 612 job->source.count = job->dests[0].count; 614 613 job->pages = &zero_page_list; 615 - job->rw = WRITE; 614 + 615 + /* 616 + * Use WRITE SAME to optimize zeroing if all dests support it. 617 + */ 618 + job->rw = WRITE | REQ_WRITE_SAME; 619 + for (i = 0; i < job->num_dests; i++) 620 + if (!bdev_write_same(job->dests[i].bdev)) { 621 + job->rw = WRITE; 622 + break; 623 + } 616 624 } 617 625 618 626 job->fn = fn;
+1 -1
drivers/md/dm-thin.c
··· 2779 2779 2780 2780 static struct target_type thin_target = { 2781 2781 .name = "thin", 2782 - .version = {1, 5, 0}, 2782 + .version = {1, 6, 0}, 2783 2783 .module = THIS_MODULE, 2784 2784 .ctr = thin_ctr, 2785 2785 .dtr = thin_dtr,