Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] sd: Implement support for WRITE SAME

Implement support for WRITE SAME(10) and WRITE SAME(16) in the SCSI disk
driver.

- We set the default maximum to 0xFFFF because there are several
devices out there that only support two-byte block counts even with
WRITE SAME(16). We only enable transfers bigger than 0xFFFF if the
device explicitly reports MAXIMUM WRITE SAME LENGTH in the BLOCK
LIMITS VPD.

- max_write_same_blocks can be overriden per-device basis in sysfs.

- The UNMAP discovery heuristics remain unchanged but the discard
limits are tweaked to match the "real" WRITE SAME commands.

- In the error handling logic we now distinguish between WRITE SAME
with and without UNMAP set.

The discovery process heuristics are:

- If the device reports a SCSI level of SPC-3 or greater we'll issue
READ SUPPORTED OPERATION CODES to find out whether WRITE SAME(16) is
supported. If that's the case we will use it.

- If the device supports the block limits VPD and reports a MAXIMUM
WRITE SAME LENGTH bigger than 0xFFFF we will use WRITE SAME(16).

- Otherwise we will use WRITE SAME(10) unless the target LBA is beyond
0xFFFFFFFF or the block count exceeds 0xFFFF.

- no_write_same is set for ATA, FireWire and USB.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>

authored by

Martin K. Petersen and committed by
James Bottomley
5db44863 26e85fcd

+191 -16
+1
drivers/ata/libata-scsi.c
··· 1053 1053 sdev->use_10_for_rw = 1; 1054 1054 sdev->use_10_for_ms = 1; 1055 1055 sdev->no_report_opcodes = 1; 1056 + sdev->no_write_same = 1; 1056 1057 1057 1058 /* Schedule policy is determined by ->qc_defer() callback and 1058 1059 * it needs to see every deferred qc. Set dev_blocked to 1 to
+1
drivers/firewire/sbp2.c
··· 1547 1547 1548 1548 sdev->use_10_for_rw = 1; 1549 1549 sdev->no_report_opcodes = 1; 1550 + sdev->no_write_same = 1; 1550 1551 1551 1552 if (sbp2_param_exclusive_login) 1552 1553 sdev->manage_start_stop = 1;
+17 -5
drivers/scsi/scsi_lib.c
··· 900 900 action = ACTION_FAIL; 901 901 error = -EILSEQ; 902 902 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 903 - } else if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) && 904 - (cmd->cmnd[0] == UNMAP || 905 - cmd->cmnd[0] == WRITE_SAME_16 || 906 - cmd->cmnd[0] == WRITE_SAME)) { 907 - description = "Discard failure"; 903 + } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { 904 + switch (cmd->cmnd[0]) { 905 + case UNMAP: 906 + description = "Discard failure"; 907 + break; 908 + case WRITE_SAME: 909 + case WRITE_SAME_16: 910 + if (cmd->cmnd[1] & 0x8) 911 + description = "Discard failure"; 912 + else 913 + description = 914 + "Write same failure"; 915 + break; 916 + default: 917 + description = "Invalid command failure"; 918 + break; 919 + } 908 920 action = ACTION_FAIL; 909 921 error = -EREMOTEIO; 910 922 } else
+161 -11
drivers/scsi/sd.c
··· 99 99 #endif 100 100 101 101 static void sd_config_discard(struct scsi_disk *, unsigned int); 102 + static void sd_config_write_same(struct scsi_disk *); 102 103 static int sd_revalidate_disk(struct gendisk *); 103 104 static void sd_unlock_native_capacity(struct gendisk *disk); 104 105 static int sd_probe(struct device *); ··· 396 395 return err ? err : count; 397 396 } 398 397 398 + static ssize_t 399 + sd_show_write_same_blocks(struct device *dev, struct device_attribute *attr, 400 + char *buf) 401 + { 402 + struct scsi_disk *sdkp = to_scsi_disk(dev); 403 + 404 + return snprintf(buf, 20, "%u\n", sdkp->max_ws_blocks); 405 + } 406 + 407 + static ssize_t 408 + sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr, 409 + const char *buf, size_t count) 410 + { 411 + struct scsi_disk *sdkp = to_scsi_disk(dev); 412 + struct scsi_device *sdp = sdkp->device; 413 + unsigned long max; 414 + int err; 415 + 416 + if (!capable(CAP_SYS_ADMIN)) 417 + return -EACCES; 418 + 419 + if (sdp->type != TYPE_DISK) 420 + return -EINVAL; 421 + 422 + err = kstrtoul(buf, 10, &max); 423 + 424 + if (err) 425 + return err; 426 + 427 + if (max == 0) 428 + sdp->no_write_same = 1; 429 + else if (max <= SD_MAX_WS16_BLOCKS) 430 + sdkp->max_ws_blocks = max; 431 + 432 + sd_config_write_same(sdkp); 433 + 434 + return count; 435 + } 436 + 399 437 static struct device_attribute sd_disk_attrs[] = { 400 438 __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type, 401 439 sd_store_cache_type), ··· 450 410 __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL), 451 411 __ATTR(provisioning_mode, S_IRUGO|S_IWUSR, sd_show_provisioning_mode, 452 412 sd_store_provisioning_mode), 413 + __ATTR(max_write_same_blocks, S_IRUGO|S_IWUSR, 414 + sd_show_write_same_blocks, sd_store_write_same_blocks), 453 415 __ATTR(max_medium_access_timeouts, S_IRUGO|S_IWUSR, 454 416 sd_show_max_medium_access_timeouts, 455 417 sd_store_max_medium_access_timeouts), ··· 603 561 return; 604 562 605 563 case SD_LBP_UNMAP: 606 - max_blocks = min_not_zero(sdkp->max_unmap_blocks, 0xffffffff); 564 + max_blocks = min_not_zero(sdkp->max_unmap_blocks, 565 + (u32)SD_MAX_WS16_BLOCKS); 607 566 break; 608 567 609 568 case SD_LBP_WS16: 610 - max_blocks = min_not_zero(sdkp->max_ws_blocks, 0xffffffff); 569 + max_blocks = min_not_zero(sdkp->max_ws_blocks, 570 + (u32)SD_MAX_WS16_BLOCKS); 611 571 break; 612 572 613 573 case SD_LBP_WS10: 614 - max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff); 574 + max_blocks = min_not_zero(sdkp->max_ws_blocks, 575 + (u32)SD_MAX_WS10_BLOCKS); 615 576 break; 616 577 617 578 case SD_LBP_ZERO: 618 - max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff); 579 + max_blocks = min_not_zero(sdkp->max_ws_blocks, 580 + (u32)SD_MAX_WS10_BLOCKS); 619 581 q->limits.discard_zeroes_data = 1; 620 582 break; 621 583 } ··· 713 667 return ret; 714 668 } 715 669 670 + static void sd_config_write_same(struct scsi_disk *sdkp) 671 + { 672 + struct request_queue *q = sdkp->disk->queue; 673 + unsigned int logical_block_size = sdkp->device->sector_size; 674 + unsigned int blocks = 0; 675 + 676 + if (sdkp->device->no_write_same) { 677 + sdkp->max_ws_blocks = 0; 678 + goto out; 679 + } 680 + 681 + /* Some devices can not handle block counts above 0xffff despite 682 + * supporting WRITE SAME(16). Consequently we default to 64k 683 + * blocks per I/O unless the device explicitly advertises a 684 + * bigger limit. 685 + */ 686 + if (sdkp->max_ws_blocks == 0) 687 + sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS; 688 + 689 + if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) 690 + blocks = min_not_zero(sdkp->max_ws_blocks, 691 + (u32)SD_MAX_WS16_BLOCKS); 692 + else 693 + blocks = min_not_zero(sdkp->max_ws_blocks, 694 + (u32)SD_MAX_WS10_BLOCKS); 695 + 696 + out: 697 + blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9)); 698 + } 699 + 700 + /** 701 + * sd_setup_write_same_cmnd - write the same data to multiple blocks 702 + * @sdp: scsi device to operate one 703 + * @rq: Request to prepare 704 + * 705 + * Will issue either WRITE SAME(10) or WRITE SAME(16) depending on 706 + * preference indicated by target device. 707 + **/ 708 + static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq) 709 + { 710 + struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); 711 + struct bio *bio = rq->bio; 712 + sector_t sector = blk_rq_pos(rq); 713 + unsigned int nr_sectors = blk_rq_sectors(rq); 714 + unsigned int nr_bytes = blk_rq_bytes(rq); 715 + int ret; 716 + 717 + if (sdkp->device->no_write_same) 718 + return BLKPREP_KILL; 719 + 720 + BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size); 721 + 722 + sector >>= ilog2(sdp->sector_size) - 9; 723 + nr_sectors >>= ilog2(sdp->sector_size) - 9; 724 + 725 + rq->__data_len = sdp->sector_size; 726 + rq->timeout = SD_WRITE_SAME_TIMEOUT; 727 + memset(rq->cmd, 0, rq->cmd_len); 728 + 729 + if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) { 730 + rq->cmd_len = 16; 731 + rq->cmd[0] = WRITE_SAME_16; 732 + put_unaligned_be64(sector, &rq->cmd[2]); 733 + put_unaligned_be32(nr_sectors, &rq->cmd[10]); 734 + } else { 735 + rq->cmd_len = 10; 736 + rq->cmd[0] = WRITE_SAME; 737 + put_unaligned_be32(sector, &rq->cmd[2]); 738 + put_unaligned_be16(nr_sectors, &rq->cmd[7]); 739 + } 740 + 741 + ret = scsi_setup_blk_pc_cmnd(sdp, rq); 742 + rq->__data_len = nr_bytes; 743 + 744 + return ret; 745 + } 746 + 716 747 static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) 717 748 { 718 749 rq->timeout = SD_FLUSH_TIMEOUT; ··· 834 711 */ 835 712 if (rq->cmd_flags & REQ_DISCARD) { 836 713 ret = sd_setup_discard_cmnd(sdp, rq); 714 + goto out; 715 + } else if (rq->cmd_flags & REQ_WRITE_SAME) { 716 + ret = sd_setup_write_same_cmnd(sdp, rq); 837 717 goto out; 838 718 } else if (rq->cmd_flags & REQ_FLUSH) { 839 719 ret = scsi_setup_flush_cmnd(sdp, rq); ··· 1610 1484 int sense_valid = 0; 1611 1485 int sense_deferred = 0; 1612 1486 unsigned char op = SCpnt->cmnd[0]; 1487 + unsigned char unmap = SCpnt->cmnd[1] & 8; 1613 1488 1614 - if (req->cmd_flags & REQ_DISCARD) { 1489 + if (req->cmd_flags & REQ_DISCARD || req->cmd_flags & REQ_WRITE_SAME) { 1615 1490 if (!result) { 1616 1491 good_bytes = blk_rq_bytes(req); 1617 1492 scsi_set_resid(SCpnt, 0); ··· 1669 1542 if (sshdr.asc == 0x10) /* DIX: Host detected corruption */ 1670 1543 good_bytes = sd_completed_bytes(SCpnt); 1671 1544 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 1672 - if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) && 1673 - (op == UNMAP || op == WRITE_SAME_16 || op == WRITE_SAME)) 1674 - sd_config_discard(sdkp, SD_LBP_DISABLE); 1545 + if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { 1546 + switch (op) { 1547 + case UNMAP: 1548 + sd_config_discard(sdkp, SD_LBP_DISABLE); 1549 + break; 1550 + case WRITE_SAME_16: 1551 + case WRITE_SAME: 1552 + if (unmap) 1553 + sd_config_discard(sdkp, SD_LBP_DISABLE); 1554 + else { 1555 + sdkp->device->no_write_same = 1; 1556 + sd_config_write_same(sdkp); 1557 + 1558 + good_bytes = 0; 1559 + req->__data_len = blk_rq_bytes(req); 1560 + req->cmd_flags |= REQ_QUIET; 1561 + } 1562 + } 1563 + } 1675 1564 break; 1676 1565 default: 1677 1566 break; ··· 2523 2380 if (buffer[3] == 0x3c) { 2524 2381 unsigned int lba_count, desc_count; 2525 2382 2526 - sdkp->max_ws_blocks = 2527 - (u32) min_not_zero(get_unaligned_be64(&buffer[36]), 2528 - (u64)0xffffffff); 2383 + sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]); 2529 2384 2530 2385 if (!sdkp->lbpme) 2531 2386 goto out; ··· 2616 2475 kfree(buffer); 2617 2476 } 2618 2477 2478 + static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) 2479 + { 2480 + if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE, 2481 + WRITE_SAME_16)) 2482 + sdkp->ws16 = 1; 2483 + } 2484 + 2619 2485 static int sd_try_extended_inquiry(struct scsi_device *sdp) 2620 2486 { 2621 2487 /* ··· 2682 2534 sd_read_write_protect_flag(sdkp, buffer); 2683 2535 sd_read_cache_type(sdkp, buffer); 2684 2536 sd_read_app_tag_own(sdkp, buffer); 2537 + sd_read_write_same(sdkp, buffer); 2685 2538 } 2686 2539 2687 2540 sdkp->first_scan = 0; ··· 2700 2551 blk_queue_flush(sdkp->disk->queue, flush); 2701 2552 2702 2553 set_capacity(disk, sdkp->capacity); 2554 + sd_config_write_same(sdkp); 2703 2555 kfree(buffer); 2704 2556 2705 2557 out:
+7
drivers/scsi/sd.h
··· 14 14 #define SD_TIMEOUT (30 * HZ) 15 15 #define SD_MOD_TIMEOUT (75 * HZ) 16 16 #define SD_FLUSH_TIMEOUT (60 * HZ) 17 + #define SD_WRITE_SAME_TIMEOUT (120 * HZ) 17 18 18 19 /* 19 20 * Number of allowed retries ··· 37 36 enum { 38 37 SD_EXT_CDB_SIZE = 32, /* Extended CDB size */ 39 38 SD_MEMPOOL_SIZE = 2, /* CDB pool size */ 39 + }; 40 + 41 + enum { 42 + SD_MAX_WS10_BLOCKS = 0xffff, 43 + SD_MAX_WS16_BLOCKS = 0x7fffff, 40 44 }; 41 45 42 46 enum { ··· 83 77 unsigned lbpws : 1; 84 78 unsigned lbpws10 : 1; 85 79 unsigned lbpvpd : 1; 80 + unsigned ws16 : 1; 86 81 }; 87 82 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) 88 83
+3
drivers/usb/storage/scsiglue.c
··· 189 189 /* Do not attempt to use REPORT SUPPORTED OPERATION CODES */ 190 190 sdev->no_report_opcodes = 1; 191 191 192 + /* Do not attempt to use WRITE SAME */ 193 + sdev->no_write_same = 1; 194 + 192 195 /* Some disks return the total number of blocks in response 193 196 * to READ CAPACITY rather than the highest block number. 194 197 * If this device makes that mistake, tell the sd driver. */
+1
include/scsi/scsi_device.h
··· 136 136 unsigned use_10_for_rw:1; /* first try 10-byte read / write */ 137 137 unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ 138 138 unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ 139 + unsigned no_write_same:1; /* no WRITE SAME command */ 139 140 unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */ 140 141 unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */ 141 142 unsigned skip_vpd_pages:1; /* do not read VPD pages */