Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: enable encoded read/write/send for bs > ps cases

Since the read verification and read repair are all supporting bs > ps
without large folios now, we can enable encoded read/write/send.

Now we can relax the alignment in assert_bbio_alignment() to
min(blocksize, PAGE_SIZE).
But also add the extra blocksize based alignment check for the logical
and length of the bbio.

There is a pitfall in btrfs_add_compress_bio_folios(), which relies on
the folios passed in to meet the minimal folio order.
But now we can pass regular page sized folios in, update it to check
each folio's size instead of using the minimal folio size.

This allows btrfs_add_compress_bio_folios() to even handle folios array
with different sizes, thankfully we don't yet need to handle such crazy
situation.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Qu Wenruo and committed by
David Sterba
ec207990 052fd7a5

+17 -44
+12 -10
fs/btrfs/bio.c
··· 867 867 struct bio_vec bvec; 868 868 struct bvec_iter iter; 869 869 const u32 blocksize = fs_info->sectorsize; 870 + const u32 alignment = min(blocksize, PAGE_SIZE); 871 + const u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; 872 + const u32 length = bbio->bio.bi_iter.bi_size; 870 873 871 - /* Metadata has no extra bs > ps alignment requirement. */ 872 - if (!is_data_bbio(bbio)) 873 - return; 874 + /* The logical and length should still be aligned to blocksize. */ 875 + ASSERT(IS_ALIGNED(logical, blocksize) && IS_ALIGNED(length, blocksize) && 876 + length != 0, "root=%llu inode=%llu logical=%llu length=%u", 877 + btrfs_root_id(bbio->inode->root), 878 + btrfs_ino(bbio->inode), logical, length); 874 879 875 880 bio_for_each_bvec(bvec, &bbio->bio, iter) 876 - ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) && 877 - IS_ALIGNED(bvec.bv_len, blocksize), 881 + ASSERT(IS_ALIGNED(bvec.bv_offset, alignment) && 882 + IS_ALIGNED(bvec.bv_len, alignment), 878 883 "root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u", 879 884 btrfs_root_id(bbio->inode->root), 880 - btrfs_ino(bbio->inode), 881 - bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT, 882 - bbio->bio.bi_iter.bi_size, iter.bi_idx, 883 - bvec.bv_offset, 884 - bvec.bv_len); 885 + btrfs_ino(bbio->inode), logical, length, iter.bi_idx, 886 + bvec.bv_offset, bvec.bv_len); 885 887 #endif 886 888 } 887 889
+4 -5
fs/btrfs/compression.c
··· 340 340 341 341 static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) 342 342 { 343 - struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; 344 343 struct bio *bio = &cb->bbio.bio; 345 344 u32 offset = 0; 345 + unsigned int findex = 0; 346 346 347 347 while (offset < cb->compressed_len) { 348 - struct folio *folio; 348 + struct folio *folio = cb->compressed_folios[findex]; 349 + u32 len = min_t(u32, cb->compressed_len - offset, folio_size(folio)); 349 350 int ret; 350 - u32 len = min_t(u32, cb->compressed_len - offset, 351 - btrfs_min_folio_size(fs_info)); 352 351 353 - folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)]; 354 352 /* Maximum compressed extent is smaller than bio size limit. */ 355 353 ret = bio_add_folio(bio, folio, len, 0); 356 354 ASSERT(ret); 357 355 offset += len; 356 + findex++; 358 357 } 359 358 } 360 359
-21
fs/btrfs/ioctl.c
··· 4408 4408 goto out_acct; 4409 4409 } 4410 4410 4411 - if (fs_info->sectorsize > PAGE_SIZE) { 4412 - ret = -ENOTTY; 4413 - goto out_acct; 4414 - } 4415 4411 if (compat) { 4416 4412 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 4417 4413 struct btrfs_ioctl_encoded_io_args_32 args32; ··· 4499 4503 4500 4504 static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat) 4501 4505 { 4502 - struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4503 4506 struct btrfs_ioctl_encoded_io_args args; 4504 4507 struct iovec iovstack[UIO_FASTIOV]; 4505 4508 struct iovec *iov = iovstack; ··· 4509 4514 4510 4515 if (!capable(CAP_SYS_ADMIN)) { 4511 4516 ret = -EPERM; 4512 - goto out_acct; 4513 - } 4514 - 4515 - if (fs_info->sectorsize > PAGE_SIZE) { 4516 - ret = -ENOTTY; 4517 4517 goto out_acct; 4518 4518 } 4519 4519 ··· 4793 4803 ret = -EPERM; 4794 4804 goto out_acct; 4795 4805 } 4796 - if (fs_info->sectorsize > PAGE_SIZE) { 4797 - ret = -ENOTTY; 4798 - goto out_acct; 4799 - } 4800 - 4801 4806 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4802 4807 4803 4808 if (issue_flags & IO_URING_F_COMPAT) { ··· 4920 4935 static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags) 4921 4936 { 4922 4937 struct file *file = cmd->file; 4923 - struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4924 4938 loff_t pos; 4925 4939 struct kiocb kiocb; 4926 4940 ssize_t ret; ··· 4934 4950 ret = -EPERM; 4935 4951 goto out_acct; 4936 4952 } 4937 - if (fs_info->sectorsize > PAGE_SIZE) { 4938 - ret = -ENOTTY; 4939 - goto out_acct; 4940 - } 4941 - 4942 4953 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4943 4954 4944 4955 if (!(file->f_mode & FMODE_WRITE)) {
+1 -8
fs/btrfs/send.c
··· 5634 5634 5635 5635 ei = btrfs_item_ptr(leaf, path->slots[0], 5636 5636 struct btrfs_file_extent_item); 5637 - /* 5638 - * Do not go through encoded read for bs > ps cases. 5639 - * 5640 - * Encoded send is using vmallocated pages as buffer, which we can 5641 - * not ensure every folio is large enough to contain a block. 5642 - */ 5643 - if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE && 5644 - (sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5637 + if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5645 5638 btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) { 5646 5639 bool is_inline = (btrfs_file_extent_type(leaf, ei) == 5647 5640 BTRFS_FILE_EXTENT_INLINE);