Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.17-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs 'protection info' updates from Christian Brauner:
"This adds the new FS_IOC_GETLBMD_CAP ioctl() to query metadata and
protection info (PI) capabilities. This ioctl returns information
about the files integrity profile. This is useful for userspace
applications to understand a files end-to-end data protection support
and configure the I/O accordingly.

For now this interface is only supported by block devices. However the
design and placement of this ioctl in generic FS ioctl space allows us
to extend it to work over files as well. This maybe useful when
filesystems start supporting PI-aware layouts.

A new structure struct logical_block_metadata_cap is introduced, which
contains the following fields:

- lbmd_flags:
bitmask of logical block metadata capability flags

- lbmd_interval:
the amount of data described by each unit of logical block metadata

- lbmd_size:
size in bytes of the logical block metadata associated with each
interval

- lbmd_opaque_size:
size in bytes of the opaque block tag associated with each interval

- lbmd_opaque_offset:
offset in bytes of the opaque block tag within the logical block
metadata

- lbmd_pi_size:
size in bytes of the T10 PI tuple associated with each interval

- lbmd_pi_offset:
offset in bytes of T10 PI tuple within the logical block metadata

- lbmd_pi_guard_tag_type:
T10 PI guard tag type

- lbmd_pi_app_tag_size:
size in bytes of the T10 PI application tag

- lbmd_pi_ref_tag_size:
size in bytes of the T10 PI reference tag

- lbmd_pi_storage_tag_size:
size in bytes of the T10 PI storage tag

The internal logic to fetch the capability is encapsulated in a helper
function blk_get_meta_cap(), which uses the blk_integrity profile
associated with the device. The ioctl returns -EOPNOTSUPP, if
CONFIG_BLK_DEV_INTEGRITY is not enabled"

* tag 'vfs-6.17-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
block: fix lbmd_guard_tag_type assignment in FS_IOC_GETLBMD_CAP
block: fix FS_IOC_GETLBMD_CAP parsing in blkdev_common_ioctl()
fs: add ioctl to query metadata and protection info capabilities
nvme: set pi_offset only when checksum type is not BLK_INTEGRITY_CSUM_NONE
block: introduce pi_tuple_size field in blk_integrity
block: rename tuple_size field in blk_integrity to metadata_size

+209 -31
+2 -2
block/bio-integrity-auto.c
··· 54 54 { 55 55 switch (bi->csum_type) { 56 56 case BLK_INTEGRITY_CSUM_CRC64: 57 - return bi->tuple_size == sizeof(struct crc64_pi_tuple); 57 + return bi->metadata_size == sizeof(struct crc64_pi_tuple); 58 58 case BLK_INTEGRITY_CSUM_CRC: 59 59 case BLK_INTEGRITY_CSUM_IP: 60 - return bi->tuple_size == sizeof(struct t10_pi_tuple); 60 + return bi->metadata_size == sizeof(struct t10_pi_tuple); 61 61 default: 62 62 pr_warn_once("%s: unknown integrity checksum type:%d\n", 63 63 __func__, bi->csum_type);
+69 -1
block/blk-integrity.c
··· 13 13 #include <linux/scatterlist.h> 14 14 #include <linux/export.h> 15 15 #include <linux/slab.h> 16 + #include <linux/t10-pi.h> 16 17 17 18 #include "blk.h" 18 19 ··· 53 52 } 54 53 55 54 return segments; 55 + } 56 + 57 + int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, 58 + struct logical_block_metadata_cap __user *argp) 59 + { 60 + struct blk_integrity *bi = blk_get_integrity(bdev->bd_disk); 61 + struct logical_block_metadata_cap meta_cap = {}; 62 + size_t usize = _IOC_SIZE(cmd); 63 + 64 + if (_IOC_DIR(cmd) != _IOC_DIR(FS_IOC_GETLBMD_CAP) || 65 + _IOC_TYPE(cmd) != _IOC_TYPE(FS_IOC_GETLBMD_CAP) || 66 + _IOC_NR(cmd) != _IOC_NR(FS_IOC_GETLBMD_CAP) || 67 + _IOC_SIZE(cmd) < LBMD_SIZE_VER0) 68 + return -ENOIOCTLCMD; 69 + 70 + if (!bi) 71 + goto out; 72 + 73 + if (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) 74 + meta_cap.lbmd_flags |= LBMD_PI_CAP_INTEGRITY; 75 + if (bi->flags & BLK_INTEGRITY_REF_TAG) 76 + meta_cap.lbmd_flags |= LBMD_PI_CAP_REFTAG; 77 + meta_cap.lbmd_interval = 1 << bi->interval_exp; 78 + meta_cap.lbmd_size = bi->metadata_size; 79 + meta_cap.lbmd_pi_size = bi->pi_tuple_size; 80 + meta_cap.lbmd_pi_offset = bi->pi_offset; 81 + meta_cap.lbmd_opaque_size = bi->metadata_size - bi->pi_tuple_size; 82 + if (meta_cap.lbmd_opaque_size && !bi->pi_offset) 83 + meta_cap.lbmd_opaque_offset = bi->pi_tuple_size; 84 + 85 + switch (bi->csum_type) { 86 + case BLK_INTEGRITY_CSUM_NONE: 87 + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_NONE; 88 + break; 89 + case BLK_INTEGRITY_CSUM_IP: 90 + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_IP; 91 + break; 92 + case BLK_INTEGRITY_CSUM_CRC: 93 + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC16_T10DIF; 94 + break; 95 + case BLK_INTEGRITY_CSUM_CRC64: 96 + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC64_NVME; 97 + break; 98 + } 99 + 100 + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE) 101 + meta_cap.lbmd_app_tag_size = 2; 102 + 103 + if (bi->flags & BLK_INTEGRITY_REF_TAG) { 104 + switch (bi->csum_type) { 105 + case BLK_INTEGRITY_CSUM_CRC64: 106 + meta_cap.lbmd_ref_tag_size = 107 + sizeof_field(struct crc64_pi_tuple, ref_tag); 108 + break; 109 + case BLK_INTEGRITY_CSUM_CRC: 110 + case BLK_INTEGRITY_CSUM_IP: 111 + meta_cap.lbmd_ref_tag_size = 112 + sizeof_field(struct t10_pi_tuple, ref_tag); 113 + break; 114 + default: 115 + break; 116 + } 117 + } 118 + 119 + out: 120 + return copy_struct_to_user(argp, usize, &meta_cap, sizeof(meta_cap), 121 + NULL); 56 122 } 57 123 58 124 /** ··· 307 239 { 308 240 struct blk_integrity *bi = dev_to_bi(dev); 309 241 310 - if (!bi->tuple_size) 242 + if (!bi->metadata_size) 311 243 return sysfs_emit(page, "none\n"); 312 244 return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi)); 313 245 }
+41 -3
block/blk-settings.c
··· 14 14 #include <linux/jiffies.h> 15 15 #include <linux/gfp.h> 16 16 #include <linux/dma-mapping.h> 17 + #include <linux/t10-pi.h> 18 + #include <linux/crc64.h> 17 19 18 20 #include "blk.h" 19 21 #include "blk-rq-qos.h" ··· 118 116 { 119 117 struct blk_integrity *bi = &lim->integrity; 120 118 121 - if (!bi->tuple_size) { 119 + if (!bi->metadata_size) { 122 120 if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE || 123 121 bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) { 124 122 pr_warn("invalid PI settings.\n"); ··· 137 135 (bi->flags & BLK_INTEGRITY_REF_TAG)) { 138 136 pr_warn("ref tag not support without checksum.\n"); 139 137 return -EINVAL; 138 + } 139 + 140 + if (bi->pi_tuple_size > bi->metadata_size) { 141 + pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n", 142 + bi->pi_tuple_size, 143 + bi->metadata_size); 144 + return -EINVAL; 145 + } 146 + 147 + switch (bi->csum_type) { 148 + case BLK_INTEGRITY_CSUM_NONE: 149 + if (bi->pi_tuple_size) { 150 + pr_warn("pi_tuple_size must be 0 when checksum type \ 151 + is none\n"); 152 + return -EINVAL; 153 + } 154 + break; 155 + case BLK_INTEGRITY_CSUM_CRC: 156 + case BLK_INTEGRITY_CSUM_IP: 157 + if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) { 158 + pr_warn("pi_tuple_size mismatch for T10 PI: expected \ 159 + %zu, got %u\n", 160 + sizeof(struct t10_pi_tuple), 161 + bi->pi_tuple_size); 162 + return -EINVAL; 163 + } 164 + break; 165 + case BLK_INTEGRITY_CSUM_CRC64: 166 + if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) { 167 + pr_warn("pi_tuple_size mismatch for CRC64 PI: \ 168 + expected %zu, got %u\n", 169 + sizeof(struct crc64_pi_tuple), 170 + bi->pi_tuple_size); 171 + return -EINVAL; 172 + } 173 + break; 140 174 } 141 175 142 176 if (!bi->interval_exp) ··· 929 891 return true; 930 892 931 893 if (ti->flags & BLK_INTEGRITY_STACKED) { 932 - if (ti->tuple_size != bi->tuple_size) 894 + if (ti->metadata_size != bi->metadata_size) 933 895 goto incompatible; 934 896 if (ti->interval_exp != bi->interval_exp) 935 897 goto incompatible; ··· 945 907 ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | 946 908 (bi->flags & BLK_INTEGRITY_REF_TAG); 947 909 ti->csum_type = bi->csum_type; 948 - ti->tuple_size = bi->tuple_size; 910 + ti->metadata_size = bi->metadata_size; 949 911 ti->pi_offset = bi->pi_offset; 950 912 ti->interval_exp = bi->interval_exp; 951 913 ti->tag_size = bi->tag_size;
+2 -1
block/ioctl.c
··· 13 13 #include <linux/uaccess.h> 14 14 #include <linux/pagemap.h> 15 15 #include <linux/io_uring/cmd.h> 16 + #include <linux/blk-integrity.h> 16 17 #include <uapi/linux/blkdev.h> 17 18 #include "blk.h" 18 19 #include "blk-crypto-internal.h" ··· 645 644 case IOC_PR_CLEAR: 646 645 return blkdev_pr_clear(bdev, mode, argp); 647 646 default: 648 - return -ENOIOCTLCMD; 647 + return blk_get_meta_cap(bdev, cmd, argp); 649 648 } 650 649 } 651 650
+8 -8
block/t10-pi.c
··· 56 56 pi->ref_tag = 0; 57 57 58 58 iter->data_buf += iter->interval; 59 - iter->prot_buf += bi->tuple_size; 59 + iter->prot_buf += bi->metadata_size; 60 60 iter->seed++; 61 61 } 62 62 } ··· 105 105 106 106 next: 107 107 iter->data_buf += iter->interval; 108 - iter->prot_buf += bi->tuple_size; 108 + iter->prot_buf += bi->metadata_size; 109 109 iter->seed++; 110 110 } 111 111 ··· 125 125 static void t10_pi_type1_prepare(struct request *rq) 126 126 { 127 127 struct blk_integrity *bi = &rq->q->limits.integrity; 128 - const int tuple_sz = bi->tuple_size; 128 + const int tuple_sz = bi->metadata_size; 129 129 u32 ref_tag = t10_pi_ref_tag(rq); 130 130 u8 offset = bi->pi_offset; 131 131 struct bio *bio; ··· 177 177 { 178 178 struct blk_integrity *bi = &rq->q->limits.integrity; 179 179 unsigned intervals = nr_bytes >> bi->interval_exp; 180 - const int tuple_sz = bi->tuple_size; 180 + const int tuple_sz = bi->metadata_size; 181 181 u32 ref_tag = t10_pi_ref_tag(rq); 182 182 u8 offset = bi->pi_offset; 183 183 struct bio *bio; ··· 234 234 put_unaligned_be48(0ULL, pi->ref_tag); 235 235 236 236 iter->data_buf += iter->interval; 237 - iter->prot_buf += bi->tuple_size; 237 + iter->prot_buf += bi->metadata_size; 238 238 iter->seed++; 239 239 } 240 240 } ··· 289 289 290 290 next: 291 291 iter->data_buf += iter->interval; 292 - iter->prot_buf += bi->tuple_size; 292 + iter->prot_buf += bi->metadata_size; 293 293 iter->seed++; 294 294 } 295 295 ··· 299 299 static void ext_pi_type1_prepare(struct request *rq) 300 300 { 301 301 struct blk_integrity *bi = &rq->q->limits.integrity; 302 - const int tuple_sz = bi->tuple_size; 302 + const int tuple_sz = bi->metadata_size; 303 303 u64 ref_tag = ext_pi_ref_tag(rq); 304 304 u8 offset = bi->pi_offset; 305 305 struct bio *bio; ··· 340 340 { 341 341 struct blk_integrity *bi = &rq->q->limits.integrity; 342 342 unsigned intervals = nr_bytes >> bi->interval_exp; 343 - const int tuple_sz = bi->tuple_size; 343 + const int tuple_sz = bi->metadata_size; 344 344 u64 ref_tag = ext_pi_ref_tag(rq); 345 345 u8 offset = bi->pi_offset; 346 346 struct bio *bio;
+2 -2
drivers/md/dm-crypt.c
··· 1192 1192 return -EINVAL; 1193 1193 } 1194 1194 1195 - if (bi->tuple_size < cc->used_tag_size) { 1195 + if (bi->metadata_size < cc->used_tag_size) { 1196 1196 ti->error = "Integrity profile tag size mismatch."; 1197 1197 return -EINVAL; 1198 1198 } 1199 - cc->tuple_size = bi->tuple_size; 1199 + cc->tuple_size = bi->metadata_size; 1200 1200 if (1 << bi->interval_exp != cc->sector_size) { 1201 1201 ti->error = "Integrity profile sector size mismatch."; 1202 1202 return -EINVAL;
+6 -6
drivers/md/dm-integrity.c
··· 3906 3906 struct blk_integrity *bi = &limits->integrity; 3907 3907 3908 3908 memset(bi, 0, sizeof(*bi)); 3909 - bi->tuple_size = ic->tag_size; 3910 - bi->tag_size = bi->tuple_size; 3909 + bi->metadata_size = ic->tag_size; 3910 + bi->tag_size = bi->metadata_size; 3911 3911 bi->interval_exp = 3912 3912 ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 3913 3913 } ··· 4746 4746 ti->error = "Integrity profile not supported"; 4747 4747 goto bad; 4748 4748 } 4749 - /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/ 4750 - if (bi->tuple_size < ic->tag_size) { 4749 + /*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/ 4750 + if (bi->metadata_size < ic->tag_size) { 4751 4751 r = -EINVAL; 4752 4752 ti->error = "The integrity profile is smaller than tag size"; 4753 4753 goto bad; 4754 4754 } 4755 - if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) { 4755 + if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) { 4756 4756 r = -EINVAL; 4757 4757 ti->error = "Too big tuple size"; 4758 4758 goto bad; 4759 4759 } 4760 - ic->tuple_size = bi->tuple_size; 4760 + ic->tuple_size = bi->metadata_size; 4761 4761 if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) { 4762 4762 r = -EINVAL; 4763 4763 ti->error = "Integrity profile sector size mismatch";
+1 -1
drivers/nvdimm/btt.c
··· 1506 1506 int rc; 1507 1507 1508 1508 if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { 1509 - lim.integrity.tuple_size = btt_meta_size(btt); 1509 + lim.integrity.metadata_size = btt_meta_size(btt); 1510 1510 lim.integrity.tag_size = btt_meta_size(btt); 1511 1511 } 1512 1512
+5 -2
drivers/nvme/host/core.c
··· 1870 1870 break; 1871 1871 } 1872 1872 1873 - bi->tuple_size = head->ms; 1874 - bi->pi_offset = info->pi_offset; 1873 + bi->metadata_size = head->ms; 1874 + if (bi->csum_type) { 1875 + bi->pi_tuple_size = head->pi_size; 1876 + bi->pi_offset = info->pi_offset; 1877 + } 1875 1878 return true; 1876 1879 } 1877 1880
+1 -1
drivers/nvme/target/io-cmd-bdev.c
··· 69 69 return; 70 70 71 71 if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) { 72 - ns->metadata_size = bi->tuple_size; 72 + ns->metadata_size = bi->metadata_size; 73 73 if (bi->flags & BLK_INTEGRITY_REF_TAG) 74 74 ns->pi_type = NVME_NS_DPS_PI_TYPE1; 75 75 else
+2 -1
drivers/scsi/sd_dif.c
··· 52 52 if (type != T10_PI_TYPE3_PROTECTION) 53 53 bi->flags |= BLK_INTEGRITY_REF_TAG; 54 54 55 - bi->tuple_size = sizeof(struct t10_pi_tuple); 55 + bi->metadata_size = sizeof(struct t10_pi_tuple); 56 + bi->pi_tuple_size = bi->metadata_size; 56 57 57 58 if (dif && type) { 58 59 bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+9 -2
include/linux/blk-integrity.h
··· 29 29 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); 30 30 int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, 31 31 ssize_t bytes); 32 + int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, 33 + struct logical_block_metadata_cap __user *argp); 32 34 33 35 static inline bool 34 36 blk_integrity_queue_supports_integrity(struct request_queue *q) 35 37 { 36 - return q->limits.integrity.tuple_size; 38 + return q->limits.integrity.metadata_size; 37 39 } 38 40 39 41 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) ··· 76 74 static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, 77 75 unsigned int sectors) 78 76 { 79 - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; 77 + return bio_integrity_intervals(bi, sectors) * bi->metadata_size; 80 78 } 81 79 82 80 static inline bool blk_integrity_rq(struct request *rq) ··· 94 92 rq->bio->bi_integrity->bip_iter); 95 93 } 96 94 #else /* CONFIG_BLK_DEV_INTEGRITY */ 95 + static inline int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, 96 + struct logical_block_metadata_cap __user *argp) 97 + { 98 + return -EOPNOTSUPP; 99 + } 97 100 static inline int blk_rq_count_integrity_sg(struct request_queue *q, 98 101 struct bio *b) 99 102 {
+2 -1
include/linux/blkdev.h
··· 116 116 struct blk_integrity { 117 117 unsigned char flags; 118 118 enum blk_integrity_checksum csum_type; 119 - unsigned char tuple_size; 119 + unsigned char metadata_size; 120 120 unsigned char pi_offset; 121 121 unsigned char interval_exp; 122 122 unsigned char tag_size; 123 + unsigned char pi_tuple_size; 123 124 }; 124 125 125 126 typedef unsigned int __bitwise blk_mode_t;
+59
include/uapi/linux/fs.h
··· 102 102 __u8 name[128]; 103 103 }; 104 104 105 + /* Protection info capability flags */ 106 + #define LBMD_PI_CAP_INTEGRITY (1 << 0) 107 + #define LBMD_PI_CAP_REFTAG (1 << 1) 108 + 109 + /* Checksum types for Protection Information */ 110 + #define LBMD_PI_CSUM_NONE 0 111 + #define LBMD_PI_CSUM_IP 1 112 + #define LBMD_PI_CSUM_CRC16_T10DIF 2 113 + #define LBMD_PI_CSUM_CRC64_NVME 4 114 + 115 + /* sizeof first published struct */ 116 + #define LBMD_SIZE_VER0 16 117 + 118 + /* 119 + * Logical block metadata capability descriptor 120 + * If the device does not support metadata, all the fields will be zero. 121 + * Applications must check lbmd_flags to determine whether metadata is 122 + * supported or not. 123 + */ 124 + struct logical_block_metadata_cap { 125 + /* Bitmask of logical block metadata capability flags */ 126 + __u32 lbmd_flags; 127 + /* 128 + * The amount of data described by each unit of logical block 129 + * metadata 130 + */ 131 + __u16 lbmd_interval; 132 + /* 133 + * Size in bytes of the logical block metadata associated with each 134 + * interval 135 + */ 136 + __u8 lbmd_size; 137 + /* 138 + * Size in bytes of the opaque block tag associated with each 139 + * interval 140 + */ 141 + __u8 lbmd_opaque_size; 142 + /* 143 + * Offset in bytes of the opaque block tag within the logical block 144 + * metadata 145 + */ 146 + __u8 lbmd_opaque_offset; 147 + /* Size in bytes of the T10 PI tuple associated with each interval */ 148 + __u8 lbmd_pi_size; 149 + /* Offset in bytes of T10 PI tuple within the logical block metadata */ 150 + __u8 lbmd_pi_offset; 151 + /* T10 PI guard tag type */ 152 + __u8 lbmd_guard_tag_type; 153 + /* Size in bytes of the T10 PI application tag */ 154 + __u8 lbmd_app_tag_size; 155 + /* Size in bytes of the T10 PI reference tag */ 156 + __u8 lbmd_ref_tag_size; 157 + /* Size in bytes of the T10 PI storage tag */ 158 + __u8 lbmd_storage_tag_size; 159 + __u8 pad; 160 + }; 161 + 105 162 /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ 106 163 #define FILE_DEDUPE_RANGE_SAME 0 107 164 #define FILE_DEDUPE_RANGE_DIFFERS 1 ··· 315 258 * also /sys/kernel/debug/ for filesystems with debugfs exports 316 259 */ 317 260 #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) 261 + /* Get logical block metadata capability details */ 262 + #define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) 318 263 319 264 /* 320 265 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)