Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[libata] support for > 512 byte sectors (e.g. 4K Native)

This change enables my x86 machine to recognize and talk to a
"Native 4K" SATA device.

When I started working on this, I didn't know Matthew Wilcox had
posted a similar patch 2 years ago:
http://git.kernel.org/?p=linux/kernel/git/willy/ata.git;a=shortlog;h=refs/heads/ata-large-sectors

Gwendal Grignou pointed me at the the above code and small portions of
this patch include Matthew's work. That's why Mathew is first on the
"Signed-off-by:". I've NOT included his use of a bitmap to determine
512 vs Native for ATA command block size - just used a simple table.
And bugs are almost certainly mine.

Lastly, the patch has been tested with a native 4K 'Engineering
Sample' drive provided by Hitachi GST.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Signed-off-by: Grant Grundler <grundler@google.com>
Reviewed-by: Gwendal Grignou <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

authored by

Grant Grundler and committed by
Jeff Garzik
295124dc 1aadf5c3

+104 -36
+64 -30
drivers/ata/libata-scsi.c
··· 53 53 #include "libata.h" 54 54 #include "libata-transport.h" 55 55 56 - #define SECTOR_SIZE 512 57 56 #define ATA_SCSI_RBUF_SIZE 4096 58 57 59 58 static DEFINE_SPINLOCK(ata_scsi_rbuf_lock); ··· 502 503 memset(scsi_cmd, 0, sizeof(scsi_cmd)); 503 504 504 505 if (args[3]) { 505 - argsize = SECTOR_SIZE * args[3]; 506 + argsize = ATA_SECT_SIZE * args[3]; 506 507 argbuf = kmalloc(argsize, GFP_KERNEL); 507 508 if (argbuf == NULL) { 508 509 rc = -ENOMEM; ··· 1136 1137 blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); 1137 1138 } else { 1138 1139 /* ATA devices must be sector aligned */ 1140 + sdev->sector_size = ata_id_logical_sector_size(dev->id); 1139 1141 blk_queue_update_dma_alignment(sdev->request_queue, 1140 - ATA_SECT_SIZE - 1); 1142 + sdev->sector_size - 1); 1141 1143 sdev->manage_start_stop = 1; 1142 1144 } 1143 1145 ··· 1153 1153 scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); 1154 1154 } 1155 1155 1156 + dev->sdev = sdev; 1156 1157 return 0; 1157 1158 } 1158 1159 ··· 1684 1683 goto nothing_to_do; 1685 1684 1686 1685 qc->flags |= ATA_QCFLAG_IO; 1687 - qc->nbytes = n_block * ATA_SECT_SIZE; 1686 + qc->nbytes = n_block * scmd->device->sector_size; 1688 1687 1689 1688 rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags, 1690 1689 qc->tag); ··· 2111 2110 2112 2111 static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) 2113 2112 { 2114 - u32 min_io_sectors; 2113 + u16 min_io_sectors; 2115 2114 2116 2115 rbuf[1] = 0xb0; 2117 2116 rbuf[3] = 0x3c; /* required VPD size with unmap support */ ··· 2123 2122 * logical than physical sector size we need to figure out what the 2124 2123 * latter is. 2125 2124 */ 2126 - if (ata_id_has_large_logical_sectors(args->id)) 2127 - min_io_sectors = ata_id_logical_per_physical_sectors(args->id); 2128 - else 2129 - min_io_sectors = 1; 2125 + min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id); 2130 2126 put_unaligned_be16(min_io_sectors, &rbuf[6]); 2131 2127 2132 2128 /* ··· 2382 2384 { 2383 2385 struct ata_device *dev = args->dev; 2384 2386 u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */ 2385 - u8 log_per_phys = 0; 2386 - u16 lowest_aligned = 0; 2387 - u16 word_106 = dev->id[106]; 2388 - u16 word_209 = dev->id[209]; 2387 + u32 sector_size; /* physical sector size in bytes */ 2388 + u8 log2_per_phys; 2389 + u16 lowest_aligned; 2389 2390 2390 - if ((word_106 & 0xc000) == 0x4000) { 2391 - /* Number and offset of logical sectors per physical sector */ 2392 - if (word_106 & (1 << 13)) 2393 - log_per_phys = word_106 & 0xf; 2394 - if ((word_209 & 0xc000) == 0x4000) { 2395 - u16 first = dev->id[209] & 0x3fff; 2396 - if (first > 0) 2397 - lowest_aligned = (1 << log_per_phys) - first; 2398 - } 2399 - } 2391 + sector_size = ata_id_logical_sector_size(dev->id); 2392 + log2_per_phys = ata_id_log2_per_physical_sector(dev->id); 2393 + lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys); 2400 2394 2401 2395 VPRINTK("ENTER\n"); 2402 2396 ··· 2403 2413 rbuf[3] = last_lba; 2404 2414 2405 2415 /* sector size */ 2406 - rbuf[6] = ATA_SECT_SIZE >> 8; 2407 - rbuf[7] = ATA_SECT_SIZE & 0xff; 2416 + rbuf[4] = sector_size >> (8 * 3); 2417 + rbuf[5] = sector_size >> (8 * 2); 2418 + rbuf[6] = sector_size >> (8 * 1); 2419 + rbuf[7] = sector_size; 2408 2420 } else { 2409 2421 /* sector count, 64-bit */ 2410 2422 rbuf[0] = last_lba >> (8 * 7); ··· 2419 2427 rbuf[7] = last_lba; 2420 2428 2421 2429 /* sector size */ 2422 - rbuf[10] = ATA_SECT_SIZE >> 8; 2423 - rbuf[11] = ATA_SECT_SIZE & 0xff; 2430 + rbuf[ 8] = sector_size >> (8 * 3); 2431 + rbuf[ 9] = sector_size >> (8 * 2); 2432 + rbuf[10] = sector_size >> (8 * 1); 2433 + rbuf[11] = sector_size; 2424 2434 2425 2435 rbuf[12] = 0; 2426 - rbuf[13] = log_per_phys; 2436 + rbuf[13] = log2_per_phys; 2427 2437 rbuf[14] = (lowest_aligned >> 8) & 0x3f; 2428 2438 rbuf[15] = lowest_aligned; 2429 2439 ··· 2869 2875 tf->device = dev->devno ? 2870 2876 tf->device | ATA_DEV1 : tf->device & ~ATA_DEV1; 2871 2877 2872 - /* READ/WRITE LONG use a non-standard sect_size */ 2873 - qc->sect_size = ATA_SECT_SIZE; 2874 2878 switch (tf->command) { 2879 + /* READ/WRITE LONG use a non-standard sect_size */ 2875 2880 case ATA_CMD_READ_LONG: 2876 2881 case ATA_CMD_READ_LONG_ONCE: 2877 2882 case ATA_CMD_WRITE_LONG: ··· 2878 2885 if (tf->protocol != ATA_PROT_PIO || tf->nsect != 1) 2879 2886 goto invalid_fld; 2880 2887 qc->sect_size = scsi_bufflen(scmd); 2888 + break; 2889 + 2890 + /* commands using reported Logical Block size (e.g. 512 or 4K) */ 2891 + case ATA_CMD_CFA_WRITE_NE: 2892 + case ATA_CMD_CFA_TRANS_SECT: 2893 + case ATA_CMD_CFA_WRITE_MULT_NE: 2894 + /* XXX: case ATA_CMD_CFA_WRITE_SECTORS_WITHOUT_ERASE: */ 2895 + case ATA_CMD_READ: 2896 + case ATA_CMD_READ_EXT: 2897 + case ATA_CMD_READ_QUEUED: 2898 + /* XXX: case ATA_CMD_READ_QUEUED_EXT: */ 2899 + case ATA_CMD_FPDMA_READ: 2900 + case ATA_CMD_READ_MULTI: 2901 + case ATA_CMD_READ_MULTI_EXT: 2902 + case ATA_CMD_PIO_READ: 2903 + case ATA_CMD_PIO_READ_EXT: 2904 + case ATA_CMD_READ_STREAM_DMA_EXT: 2905 + case ATA_CMD_READ_STREAM_EXT: 2906 + case ATA_CMD_VERIFY: 2907 + case ATA_CMD_VERIFY_EXT: 2908 + case ATA_CMD_WRITE: 2909 + case ATA_CMD_WRITE_EXT: 2910 + case ATA_CMD_WRITE_FUA_EXT: 2911 + case ATA_CMD_WRITE_QUEUED: 2912 + case ATA_CMD_WRITE_QUEUED_FUA_EXT: 2913 + case ATA_CMD_FPDMA_WRITE: 2914 + case ATA_CMD_WRITE_MULTI: 2915 + case ATA_CMD_WRITE_MULTI_EXT: 2916 + case ATA_CMD_WRITE_MULTI_FUA_EXT: 2917 + case ATA_CMD_PIO_WRITE: 2918 + case ATA_CMD_PIO_WRITE_EXT: 2919 + case ATA_CMD_WRITE_STREAM_DMA_EXT: 2920 + case ATA_CMD_WRITE_STREAM_EXT: 2921 + qc->sect_size = scmd->device->sector_size; 2922 + break; 2923 + 2924 + /* Everything else uses 512 byte "sectors" */ 2925 + default: 2926 + qc->sect_size = ATA_SECT_SIZE; 2881 2927 } 2882 2928 2883 2929 /* ··· 3412 3380 if (!IS_ERR(sdev)) { 3413 3381 dev->sdev = sdev; 3414 3382 scsi_device_put(sdev); 3383 + } else { 3384 + dev->sdev = NULL; 3415 3385 } 3416 3386 } 3417 3387 }
+40 -6
include/linux/ata.h
··· 89 89 ATA_ID_SPG = 98, 90 90 ATA_ID_LBA_CAPACITY_2 = 100, 91 91 ATA_ID_SECTOR_SIZE = 106, 92 + ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ 92 93 ATA_ID_LAST_LUN = 126, 93 94 ATA_ID_DLF = 128, 94 95 ATA_ID_CSFO = 129, ··· 641 640 return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; 642 641 } 643 642 644 - static inline int ata_id_has_large_logical_sectors(const u16 *id) 643 + static inline u32 ata_id_logical_sector_size(const u16 *id) 645 644 { 646 - if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) 647 - return 0; 648 - return id[ATA_ID_SECTOR_SIZE] & (1 << 13); 645 + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. 646 + * IDENTIFY DEVICE data, word 117-118. 647 + * 0xd000 ignores bit 13 (logical:physical > 1) 648 + */ 649 + if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000) 650 + return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16) 651 + + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ; 652 + return ATA_SECT_SIZE; 649 653 } 650 654 651 - static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) 655 + static inline u8 ata_id_log2_per_physical_sector(const u16 *id) 652 656 { 653 - return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); 657 + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. 658 + * IDENTIFY DEVICE data, word 106. 659 + * 0xe000 ignores bit 12 (logical sector > 512 bytes) 660 + */ 661 + if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000) 662 + return (id[ATA_ID_SECTOR_SIZE] & 0xf); 663 + return 0; 664 + } 665 + 666 + /* Offset of logical sectors relative to physical sectors. 667 + * 668 + * If device has more than one logical sector per physical sector 669 + * (aka 512 byte emulation), vendors might offset the "sector 0" address 670 + * so sector 63 is "naturally aligned" - e.g. FAT partition table. 671 + * This avoids Read/Mod/Write penalties when using FAT partition table 672 + * and updating "well aligned" (FS perspective) physical sectors on every 673 + * transaction. 674 + */ 675 + static inline u16 ata_id_logical_sector_offset(const u16 *id, 676 + u8 log2_per_phys) 677 + { 678 + u16 word_209 = id[209]; 679 + 680 + if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) { 681 + u16 first = word_209 & 0x3fff; 682 + if (first > 0) 683 + return (1 << log2_per_phys) - first; 684 + } 685 + return 0; 654 686 } 655 687 656 688 static inline int ata_id_has_lba48(const u16 *id)