Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: make btrfs_csum_one_bio() handle bs > ps without large folios

For bs > ps cases, all folios passed into btrfs_csum_one_bio() are
ensured to be backed by large folios. But that requirement excludes
features like direct IO and encoded writes.

To support bs > ps without large folios, enhance btrfs_csum_one_bio()
by:

- Split btrfs_calculate_block_csum() into two versions
* btrfs_calculate_block_csum_folio()
For call sites where a fs block is always backed by a large folio.

This will do extra checks on the folio size, build a paddrs[] array,
and pass it into the newer btrfs_calculate_block_csum_pages()
helper.

For now btrfs_check_block_csum() is still using this version.

* btrfs_calculate_block_csum_pages()
For call sites that may hit a fs block backed by noncontiguous pages.
The pages are represented by paddrs[] array, which includes the
offset inside the page.

This function will do the proper sub-block handling.

- Make btrfs_csum_one_bio() to use btrfs_calculate_block_csum_pages()
This means we will need to build a local paddrs[] array, and after
filling a fs block, do the checksum calculation.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Qu Wenruo and committed by
David Sterba
62bcbdca fe1e5003

+68 -26
+4 -2
fs/btrfs/btrfs_inode.h
··· 543 543 #endif 544 544 } 545 545 546 - void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 547 - u8 *dest); 546 + void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, 547 + const phys_addr_t paddr, u8 *dest); 548 + void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, 549 + const phys_addr_t paddrs[], u8 *dest); 548 550 int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 549 551 const u8 * const csum_expected); 550 552 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
+12 -3
fs/btrfs/file-item.c
··· 775 775 struct bvec_iter iter = *src; 776 776 phys_addr_t paddr; 777 777 const u32 blocksize = fs_info->sectorsize; 778 + const u32 step = min(blocksize, PAGE_SIZE); 779 + const u32 nr_steps = blocksize / step; 780 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 781 + u32 offset = 0; 778 782 int index = 0; 779 783 780 784 shash->tfm = fs_info->csum_shash; 781 785 782 - btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) { 783 - btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index); 784 - index += fs_info->csum_size; 786 + btrfs_bio_for_each_block(paddr, bio, &iter, step) { 787 + paddrs[(offset / step) % nr_steps] = paddr; 788 + offset += step; 789 + 790 + if (IS_ALIGNED(offset, blocksize)) { 791 + btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index); 792 + index += fs_info->csum_size; 793 + } 785 794 } 786 795 } 787 796
+52 -21
fs/btrfs/inode.c
··· 3343 3343 return btrfs_finish_one_ordered(ordered); 3344 3344 } 3345 3345 3346 - void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 3347 - u8 *dest) 3346 + /* 3347 + * Calculate the checksum of an fs block at physical memory address @paddr, 3348 + * and save the result to @dest. 3349 + * 3350 + * The folio containing @paddr must be large enough to contain a full fs block. 3351 + */ 3352 + void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, 3353 + const phys_addr_t paddr, u8 *dest) 3348 3354 { 3349 3355 struct folio *folio = page_folio(phys_to_page(paddr)); 3350 3356 const u32 blocksize = fs_info->sectorsize; 3351 - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3357 + const u32 step = min(blocksize, PAGE_SIZE); 3358 + const u32 nr_steps = blocksize / step; 3359 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 3352 3360 3353 - shash->tfm = fs_info->csum_shash; 3354 3361 /* The full block must be inside the folio. */ 3355 3362 ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); 3356 3363 3357 - if (folio_test_partial_kmap(folio)) { 3358 - size_t cur = paddr; 3364 + for (int i = 0; i < nr_steps; i++) { 3365 + u32 pindex = offset_in_folio(folio, paddr + i * step) >> PAGE_SHIFT; 3359 3366 3360 - crypto_shash_init(shash); 3361 - while (cur < paddr + blocksize) { 3362 - void *kaddr; 3363 - size_t len = min(paddr + blocksize - cur, 3364 - PAGE_SIZE - offset_in_page(cur)); 3365 - 3366 - kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur)); 3367 - crypto_shash_update(shash, kaddr, len); 3368 - kunmap_local(kaddr); 3369 - cur += len; 3370 - } 3371 - crypto_shash_final(shash, dest); 3372 - } else { 3373 - crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest); 3367 + /* 3368 + * For bs <= ps cases, we will only run the loop once, so the offset 3369 + * inside the page will only added to paddrs[0]. 3370 + * 3371 + * For bs > ps cases, the block must be page aligned, thus offset 3372 + * inside the page will always be 0. 3373 + */ 3374 + paddrs[i] = page_to_phys(folio_page(folio, pindex)) + offset_in_page(paddr); 3374 3375 } 3376 + return btrfs_calculate_block_csum_pages(fs_info, paddrs, dest); 3375 3377 } 3378 + 3379 + /* 3380 + * Calculate the checksum of a fs block backed by multiple noncontiguous pages 3381 + * at @paddrs[] and save the result to @dest. 3382 + * 3383 + * The folio containing @paddr must be large enough to contain a full fs block. 3384 + */ 3385 + void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, 3386 + const phys_addr_t paddrs[], u8 *dest) 3387 + { 3388 + const u32 blocksize = fs_info->sectorsize; 3389 + const u32 step = min(blocksize, PAGE_SIZE); 3390 + const u32 nr_steps = blocksize / step; 3391 + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3392 + 3393 + shash->tfm = fs_info->csum_shash; 3394 + crypto_shash_init(shash); 3395 + for (int i = 0; i < nr_steps; i++) { 3396 + const phys_addr_t paddr = paddrs[i]; 3397 + void *kaddr; 3398 + 3399 + ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE); 3400 + kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr); 3401 + crypto_shash_update(shash, kaddr, step); 3402 + kunmap_local(kaddr); 3403 + } 3404 + crypto_shash_final(shash, dest); 3405 + } 3406 + 3376 3407 /* 3377 3408 * Verify the checksum for a single sector without any extra action that depend 3378 3409 * on the type of I/O. ··· 3413 3382 int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 3414 3383 const u8 * const csum_expected) 3415 3384 { 3416 - btrfs_calculate_block_csum(fs_info, paddr, csum); 3385 + btrfs_calculate_block_csum_folio(fs_info, paddr, csum); 3417 3386 if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0)) 3418 3387 return -EIO; 3419 3388 return 0;