Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] sd: Logical Block Provisioning update

SBC3r26 contains many changes to the Logical Block Provisioning
interfaces (formerly known as Thin Provisioning ditto). This patch
implements support for both the old and new schemes using the same
heuristic as before (whether the LBP VPD page is present).

The new code also allows the provisioning mode (i.e. choice of command)
to be overridden on a per-device basis via sysfs. Two additional modes
are supported in this version:

- WRITE SAME(10) with the UNMAP bit set

- WRITE SAME(10) without the UNMAP bit set. This allows us to support
devices that predate the TP/LBP enhancements in SBC3 and which work
by way zero-detection

Switching between modes has been consolidated in a helper function that
also updates the block layer topology according to the limitations of
the chosen command.

I experimented with trying WRITE SAME(16) if UNMAP fails, WRITE SAME(10)
if WRITE SAME(16) fails, etc. but found several devices that got
cranky. So for now we'll disable discard if one of the commands
fail. The user still has the option of selecting a different mode in
sysfs.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>

authored by

Martin K. Petersen and committed by
James Bottomley
c98a0eb0 72f7d322

+204 -59
+7
drivers/scsi/scsi_lib.c
··· 867 867 description = "Host Data Integrity Failure"; 868 868 action = ACTION_FAIL; 869 869 error = -EILSEQ; 870 + /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 871 + } else if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) && 872 + (cmd->cmnd[0] == UNMAP || 873 + cmd->cmnd[0] == WRITE_SAME_16 || 874 + cmd->cmnd[0] == WRITE_SAME)) { 875 + description = "Discard failure"; 876 + action = ACTION_FAIL; 870 877 } else 871 878 action = ACTION_FAIL; 872 879 break;
+177 -54
drivers/scsi/sd.c
··· 96 96 #define SD_MINORS 0 97 97 #endif 98 98 99 + static void sd_config_discard(struct scsi_disk *, unsigned int); 99 100 static int sd_revalidate_disk(struct gendisk *); 100 101 static void sd_unlock_native_capacity(struct gendisk *disk); 101 102 static int sd_probe(struct device *); ··· 295 294 { 296 295 struct scsi_disk *sdkp = to_scsi_disk(dev); 297 296 298 - return snprintf(buf, 20, "%u\n", sdkp->thin_provisioning); 297 + return snprintf(buf, 20, "%u\n", sdkp->lbpme); 298 + } 299 + 300 + static const char *lbp_mode[] = { 301 + [SD_LBP_FULL] = "full", 302 + [SD_LBP_UNMAP] = "unmap", 303 + [SD_LBP_WS16] = "writesame_16", 304 + [SD_LBP_WS10] = "writesame_10", 305 + [SD_LBP_ZERO] = "writesame_zero", 306 + [SD_LBP_DISABLE] = "disabled", 307 + }; 308 + 309 + static ssize_t 310 + sd_show_provisioning_mode(struct device *dev, struct device_attribute *attr, 311 + char *buf) 312 + { 313 + struct scsi_disk *sdkp = to_scsi_disk(dev); 314 + 315 + return snprintf(buf, 20, "%s\n", lbp_mode[sdkp->provisioning_mode]); 316 + } 317 + 318 + static ssize_t 319 + sd_store_provisioning_mode(struct device *dev, struct device_attribute *attr, 320 + const char *buf, size_t count) 321 + { 322 + struct scsi_disk *sdkp = to_scsi_disk(dev); 323 + struct scsi_device *sdp = sdkp->device; 324 + 325 + if (!capable(CAP_SYS_ADMIN)) 326 + return -EACCES; 327 + 328 + if (sdp->type != TYPE_DISK) 329 + return -EINVAL; 330 + 331 + if (!strncmp(buf, lbp_mode[SD_LBP_UNMAP], 20)) 332 + sd_config_discard(sdkp, SD_LBP_UNMAP); 333 + else if (!strncmp(buf, lbp_mode[SD_LBP_WS16], 20)) 334 + sd_config_discard(sdkp, SD_LBP_WS16); 335 + else if (!strncmp(buf, lbp_mode[SD_LBP_WS10], 20)) 336 + sd_config_discard(sdkp, SD_LBP_WS10); 337 + else if (!strncmp(buf, lbp_mode[SD_LBP_ZERO], 20)) 338 + sd_config_discard(sdkp, SD_LBP_ZERO); 339 + else if (!strncmp(buf, lbp_mode[SD_LBP_DISABLE], 20)) 340 + sd_config_discard(sdkp, SD_LBP_DISABLE); 341 + else 342 + return -EINVAL; 343 + 344 + return count; 299 345 } 300 346 301 347 static struct device_attribute sd_disk_attrs[] = { ··· 357 309 __ATTR(protection_mode, S_IRUGO, sd_show_protection_mode, NULL), 358 310 __ATTR(app_tag_own, S_IRUGO, sd_show_app_tag_own, NULL), 359 311 __ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL), 312 + __ATTR(provisioning_mode, S_IRUGO|S_IWUSR, sd_show_provisioning_mode, 313 + sd_store_provisioning_mode), 360 314 __ATTR_NULL, 361 315 }; 362 316 ··· 483 433 scsi_set_prot_type(scmd, dif); 484 434 } 485 435 436 + static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) 437 + { 438 + struct request_queue *q = sdkp->disk->queue; 439 + unsigned int logical_block_size = sdkp->device->sector_size; 440 + unsigned int max_blocks = 0; 441 + 442 + q->limits.discard_zeroes_data = sdkp->lbprz; 443 + q->limits.discard_alignment = sdkp->unmap_alignment; 444 + q->limits.discard_granularity = 445 + max(sdkp->physical_block_size, 446 + sdkp->unmap_granularity * logical_block_size); 447 + 448 + switch (mode) { 449 + 450 + case SD_LBP_DISABLE: 451 + q->limits.max_discard_sectors = 0; 452 + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 453 + return; 454 + 455 + case SD_LBP_UNMAP: 456 + max_blocks = min_not_zero(sdkp->max_unmap_blocks, 0xffffffff); 457 + break; 458 + 459 + case SD_LBP_WS16: 460 + max_blocks = min_not_zero(sdkp->max_ws_blocks, 0xffffffff); 461 + break; 462 + 463 + case SD_LBP_WS10: 464 + max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff); 465 + break; 466 + 467 + case SD_LBP_ZERO: 468 + max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff); 469 + q->limits.discard_zeroes_data = 1; 470 + break; 471 + } 472 + 473 + q->limits.max_discard_sectors = max_blocks * (logical_block_size >> 9); 474 + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 475 + 476 + sdkp->provisioning_mode = mode; 477 + } 478 + 486 479 /** 487 480 * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device 488 481 * @sdp: scsi device to operate one ··· 542 449 unsigned int nr_sectors = bio_sectors(bio); 543 450 unsigned int len; 544 451 int ret; 452 + char *buf; 545 453 struct page *page; 546 454 547 455 if (sdkp->device->sector_size == 4096) { ··· 558 464 if (!page) 559 465 return BLKPREP_DEFER; 560 466 561 - if (sdkp->unmap) { 562 - char *buf = page_address(page); 467 + switch (sdkp->provisioning_mode) { 468 + case SD_LBP_UNMAP: 469 + buf = page_address(page); 563 470 564 471 rq->cmd_len = 10; 565 472 rq->cmd[0] = UNMAP; ··· 572 477 put_unaligned_be32(nr_sectors, &buf[16]); 573 478 574 479 len = 24; 575 - } else { 480 + break; 481 + 482 + case SD_LBP_WS16: 576 483 rq->cmd_len = 16; 577 484 rq->cmd[0] = WRITE_SAME_16; 578 485 rq->cmd[1] = 0x8; /* UNMAP */ ··· 582 485 put_unaligned_be32(nr_sectors, &rq->cmd[10]); 583 486 584 487 len = sdkp->device->sector_size; 488 + break; 489 + 490 + case SD_LBP_WS10: 491 + case SD_LBP_ZERO: 492 + rq->cmd_len = 10; 493 + rq->cmd[0] = WRITE_SAME; 494 + if (sdkp->provisioning_mode == SD_LBP_WS10) 495 + rq->cmd[1] = 0x8; /* UNMAP */ 496 + put_unaligned_be32(sector, &rq->cmd[2]); 497 + put_unaligned_be16(nr_sectors, &rq->cmd[7]); 498 + 499 + len = sdkp->device->sector_size; 500 + break; 501 + 502 + default: 503 + goto out; 585 504 } 586 505 587 506 blk_add_request_payload(rq, page, len); 588 507 ret = scsi_setup_blk_pc_cmnd(sdp, rq); 589 508 rq->buffer = page_address(page); 509 + 510 + out: 590 511 if (ret != BLKPREP_OK) { 591 512 __free_page(page); 592 513 rq->buffer = NULL; ··· 1366 1251 struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); 1367 1252 int sense_valid = 0; 1368 1253 int sense_deferred = 0; 1254 + unsigned char op = SCpnt->cmnd[0]; 1369 1255 1370 - if (SCpnt->request->cmd_flags & REQ_DISCARD) { 1371 - if (!result) 1372 - scsi_set_resid(SCpnt, 0); 1373 - return good_bytes; 1374 - } 1256 + if ((SCpnt->request->cmd_flags & REQ_DISCARD) && !result) 1257 + scsi_set_resid(SCpnt, 0); 1375 1258 1376 1259 if (result) { 1377 1260 sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); ··· 1408 1295 SCpnt->result = 0; 1409 1296 memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); 1410 1297 break; 1411 - case ABORTED_COMMAND: /* DIF: Target detected corruption */ 1412 - case ILLEGAL_REQUEST: /* DIX: Host detected corruption */ 1413 - if (sshdr.asc == 0x10) 1298 + case ABORTED_COMMAND: 1299 + if (sshdr.asc == 0x10) /* DIF: Target detected corruption */ 1414 1300 good_bytes = sd_completed_bytes(SCpnt); 1301 + break; 1302 + case ILLEGAL_REQUEST: 1303 + if (sshdr.asc == 0x10) /* DIX: Host detected corruption */ 1304 + good_bytes = sd_completed_bytes(SCpnt); 1305 + /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 1306 + if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) && 1307 + (op == UNMAP || op == WRITE_SAME_16 || op == WRITE_SAME)) 1308 + sd_config_discard(sdkp, SD_LBP_DISABLE); 1415 1309 break; 1416 1310 default: 1417 1311 break; ··· 1716 1596 sd_printk(KERN_NOTICE, sdkp, 1717 1597 "physical block alignment offset: %u\n", alignment); 1718 1598 1719 - if (buffer[14] & 0x80) { /* TPE */ 1720 - struct request_queue *q = sdp->request_queue; 1599 + if (buffer[14] & 0x80) { /* LBPME */ 1600 + sdkp->lbpme = 1; 1721 1601 1722 - sdkp->thin_provisioning = 1; 1723 - q->limits.discard_granularity = sdkp->physical_block_size; 1724 - q->limits.max_discard_sectors = 0xffffffff; 1602 + if (buffer[14] & 0x40) /* LBPRZ */ 1603 + sdkp->lbprz = 1; 1725 1604 1726 - if (buffer[14] & 0x40) /* TPRZ */ 1727 - q->limits.discard_zeroes_data = 1; 1728 - 1729 - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 1605 + sd_config_discard(sdkp, SD_LBP_WS16); 1730 1606 } 1731 1607 1732 1608 sdkp->capacity = lba + 1; ··· 2207 2091 */ 2208 2092 static void sd_read_block_limits(struct scsi_disk *sdkp) 2209 2093 { 2210 - struct request_queue *q = sdkp->disk->queue; 2211 2094 unsigned int sector_sz = sdkp->device->sector_size; 2212 2095 const int vpd_len = 64; 2213 2096 unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL); ··· 2221 2106 blk_queue_io_opt(sdkp->disk->queue, 2222 2107 get_unaligned_be32(&buffer[12]) * sector_sz); 2223 2108 2224 - /* Thin provisioning enabled and page length indicates TP support */ 2225 - if (sdkp->thin_provisioning && buffer[3] == 0x3c) { 2226 - unsigned int lba_count, desc_count, granularity; 2109 + if (buffer[3] == 0x3c) { 2110 + unsigned int lba_count, desc_count; 2111 + 2112 + sdkp->max_ws_blocks = 2113 + (u32) min_not_zero(get_unaligned_be64(&buffer[36]), 2114 + (u64)0xffffffff); 2115 + 2116 + if (!sdkp->lbpme) 2117 + goto out; 2227 2118 2228 2119 lba_count = get_unaligned_be32(&buffer[20]); 2229 2120 desc_count = get_unaligned_be32(&buffer[24]); 2230 2121 2231 - if (lba_count && desc_count) { 2232 - if (sdkp->tpvpd && !sdkp->tpu) 2233 - sdkp->unmap = 0; 2234 - else 2235 - sdkp->unmap = 1; 2236 - } 2122 + if (lba_count && desc_count) 2123 + sdkp->max_unmap_blocks = lba_count; 2237 2124 2238 - if (sdkp->tpvpd && !sdkp->tpu && !sdkp->tpws) { 2239 - sd_printk(KERN_ERR, sdkp, "Thin provisioning is " \ 2240 - "enabled but neither TPU, nor TPWS are " \ 2241 - "set. Disabling discard!\n"); 2242 - goto out; 2243 - } 2244 - 2245 - if (lba_count) 2246 - q->limits.max_discard_sectors = 2247 - lba_count * sector_sz >> 9; 2248 - 2249 - granularity = get_unaligned_be32(&buffer[28]); 2250 - 2251 - if (granularity) 2252 - q->limits.discard_granularity = granularity * sector_sz; 2125 + sdkp->unmap_granularity = get_unaligned_be32(&buffer[28]); 2253 2126 2254 2127 if (buffer[32] & 0x80) 2255 - q->limits.discard_alignment = 2128 + sdkp->unmap_alignment = 2256 2129 get_unaligned_be32(&buffer[32]) & ~(1 << 31); 2130 + 2131 + if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ 2132 + 2133 + if (sdkp->max_unmap_blocks) 2134 + sd_config_discard(sdkp, SD_LBP_UNMAP); 2135 + else 2136 + sd_config_discard(sdkp, SD_LBP_WS16); 2137 + 2138 + } else { /* LBP VPD page tells us what to use */ 2139 + 2140 + if (sdkp->lbpu && sdkp->max_unmap_blocks) 2141 + sd_config_discard(sdkp, SD_LBP_UNMAP); 2142 + else if (sdkp->lbpws) 2143 + sd_config_discard(sdkp, SD_LBP_WS16); 2144 + else if (sdkp->lbpws10) 2145 + sd_config_discard(sdkp, SD_LBP_WS10); 2146 + else 2147 + sd_config_discard(sdkp, SD_LBP_DISABLE); 2148 + } 2257 2149 } 2258 2150 2259 2151 out: ··· 2294 2172 } 2295 2173 2296 2174 /** 2297 - * sd_read_thin_provisioning - Query thin provisioning VPD page 2175 + * sd_read_block_provisioning - Query provisioning VPD page 2298 2176 * @disk: disk to query 2299 2177 */ 2300 - static void sd_read_thin_provisioning(struct scsi_disk *sdkp) 2178 + static void sd_read_block_provisioning(struct scsi_disk *sdkp) 2301 2179 { 2302 2180 unsigned char *buffer; 2303 2181 const int vpd_len = 8; 2304 2182 2305 - if (sdkp->thin_provisioning == 0) 2183 + if (sdkp->lbpme == 0) 2306 2184 return; 2307 2185 2308 2186 buffer = kmalloc(vpd_len, GFP_KERNEL); ··· 2310 2188 if (!buffer || scsi_get_vpd_page(sdkp->device, 0xb2, buffer, vpd_len)) 2311 2189 goto out; 2312 2190 2313 - sdkp->tpvpd = 1; 2314 - sdkp->tpu = (buffer[5] >> 7) & 1; /* UNMAP */ 2315 - sdkp->tpws = (buffer[5] >> 6) & 1; /* WRITE SAME(16) with UNMAP */ 2191 + sdkp->lbpvpd = 1; 2192 + sdkp->lbpu = (buffer[5] >> 7) & 1; /* UNMAP */ 2193 + sdkp->lbpws = (buffer[5] >> 6) & 1; /* WRITE SAME(16) with UNMAP */ 2194 + sdkp->lbpws10 = (buffer[5] >> 5) & 1; /* WRITE SAME(10) with UNMAP */ 2316 2195 2317 2196 out: 2318 2197 kfree(buffer); ··· 2370 2247 sd_read_capacity(sdkp, buffer); 2371 2248 2372 2249 if (sd_try_extended_inquiry(sdp)) { 2373 - sd_read_thin_provisioning(sdkp); 2250 + sd_read_block_provisioning(sdkp); 2374 2251 sd_read_block_limits(sdkp); 2375 2252 sd_read_block_characteristics(sdkp); 2376 2253 }
+20 -5
drivers/scsi/sd.h
··· 43 43 SD_MEMPOOL_SIZE = 2, /* CDB pool size */ 44 44 }; 45 45 46 + enum { 47 + SD_LBP_FULL = 0, /* Full logical block provisioning */ 48 + SD_LBP_UNMAP, /* Use UNMAP command */ 49 + SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */ 50 + SD_LBP_WS10, /* Use WRITE SAME(10) with UNMAP bit */ 51 + SD_LBP_ZERO, /* Use WRITE SAME(10) with zero payload */ 52 + SD_LBP_DISABLE, /* Discard disabled due to failed cmd */ 53 + }; 54 + 46 55 struct scsi_disk { 47 56 struct scsi_driver *driver; /* always &sd_template */ 48 57 struct scsi_device *device; ··· 59 50 struct gendisk *disk; 60 51 atomic_t openers; 61 52 sector_t capacity; /* size in 512-byte sectors */ 53 + u32 max_ws_blocks; 54 + u32 max_unmap_blocks; 55 + u32 unmap_granularity; 56 + u32 unmap_alignment; 62 57 u32 index; 63 58 unsigned int physical_block_size; 64 59 u8 media_present; 65 60 u8 write_prot; 66 61 u8 protection_type;/* Data Integrity Field */ 62 + u8 provisioning_mode; 67 63 unsigned ATO : 1; /* state of disk ATO bit */ 68 64 unsigned WCE : 1; /* state of disk WCE bit */ 69 65 unsigned RCD : 1; /* state of disk RCD bit, unused */ 70 66 unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ 71 67 unsigned first_scan : 1; 72 - unsigned thin_provisioning : 1; 73 - unsigned unmap : 1; 74 - unsigned tpws : 1; 75 - unsigned tpu : 1; 76 - unsigned tpvpd : 1; 68 + unsigned lbpme : 1; 69 + unsigned lbprz : 1; 70 + unsigned lbpu : 1; 71 + unsigned lbpws : 1; 72 + unsigned lbpws10 : 1; 73 + unsigned lbpvpd : 1; 77 74 }; 78 75 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) 79 76