Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

squashfs: cache partial compressed blocks

Before commit 93e72b3c612adcaca1 ("squashfs: migrate from ll_rw_block
usage to BIO"), compressed blocks read by squashfs were cached in the page
cache, but that is not the case after that commit. That has lead to
squashfs having to re-read a lot of sectors from disk/flash.

For example, the first sectors of every metadata block need to be read
twice from the disk. Once partially to read the length, and a second time
to read the block itself. Also, in linear reads of large files, the last
sectors of one data block are re-read from disk when reading the next data
block, since the compressed blocks are of variable sizes and not aligned
to device blocks. This extra I/O results in a degrade in read performance
of, for example, ~16% in one scenario on my ARM platform using squashfs
with dm-verity and NAND.

Since the decompressed data is cached in the page cache or squashfs'
internal metadata and fragment caches, caching _all_ compressed pages
would lead to a lot of double caching and is undesirable. But make the
code cache any disk blocks which were only partially requested, since
these are the ones likely to include data which is needed by other file
system blocks. This restores read performance in my test scenario.

The compressed block caching is only applied when the disk block size is
equal to the page size, to avoid having to deal with caching sub-page
reads.

[akpm@linux-foundation.org: fs/squashfs/block.c needs linux/pagemap.h]
[vincent.whitchurch@axis.com: fix page update race]
Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-1-d54a7fa23e7b@axis.com
[vincent.whitchurch@axis.com: fix page indices]
Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-2-d54a7fa23e7b@axis.com
[akpm@linux-foundation.org: fix layout, per hch]
Link: https://lkml.kernel.org/r/20230510-squashfs-cache-v4-1-3bd394e1ee71@axis.com
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Lougher <phillip@squashfs.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Vincent Whitchurch and committed by
Andrew Morton
e994f5b6 6b81459c

+129 -6
+111 -6
fs/squashfs/block.c
··· 17 17 #include <linux/fs.h> 18 18 #include <linux/vfs.h> 19 19 #include <linux/slab.h> 20 + #include <linux/pagemap.h> 20 21 #include <linux/string.h> 21 22 #include <linux/bio.h> 22 23 ··· 76 75 return copied_bytes; 77 76 } 78 77 78 + static int squashfs_bio_read_cached(struct bio *fullbio, 79 + struct address_space *cache_mapping, u64 index, int length, 80 + u64 read_start, u64 read_end, int page_count) 81 + { 82 + struct page *head_to_cache = NULL, *tail_to_cache = NULL; 83 + struct block_device *bdev = fullbio->bi_bdev; 84 + int start_idx = 0, end_idx = 0; 85 + struct bvec_iter_all iter_all; 86 + struct bio *bio = NULL; 87 + struct bio_vec *bv; 88 + int idx = 0; 89 + int err = 0; 90 + 91 + bio_for_each_segment_all(bv, fullbio, iter_all) { 92 + struct page *page = bv->bv_page; 93 + 94 + if (page->mapping == cache_mapping) { 95 + idx++; 96 + continue; 97 + } 98 + 99 + /* 100 + * We only use this when the device block size is the same as 101 + * the page size, so read_start and read_end cover full pages. 102 + * 103 + * Compare these to the original required index and length to 104 + * only cache pages which were requested partially, since these 105 + * are the ones which are likely to be needed when reading 106 + * adjacent blocks. 107 + */ 108 + if (idx == 0 && index != read_start) 109 + head_to_cache = page; 110 + else if (idx == page_count - 1 && index + length != read_end) 111 + tail_to_cache = page; 112 + 113 + if (!bio || idx != end_idx) { 114 + struct bio *new = bio_alloc_clone(bdev, fullbio, 115 + GFP_NOIO, &fs_bio_set); 116 + 117 + if (bio) { 118 + bio_trim(bio, start_idx * PAGE_SECTORS, 119 + (end_idx - start_idx) * PAGE_SECTORS); 120 + bio_chain(bio, new); 121 + submit_bio(bio); 122 + } 123 + 124 + bio = new; 125 + start_idx = idx; 126 + } 127 + 128 + idx++; 129 + end_idx = idx; 130 + } 131 + 132 + if (bio) { 133 + bio_trim(bio, start_idx * PAGE_SECTORS, 134 + (end_idx - start_idx) * PAGE_SECTORS); 135 + err = submit_bio_wait(bio); 136 + bio_put(bio); 137 + } 138 + 139 + if (err) 140 + return err; 141 + 142 + if (head_to_cache) { 143 + int ret = add_to_page_cache_lru(head_to_cache, cache_mapping, 144 + read_start >> PAGE_SHIFT, 145 + GFP_NOIO); 146 + 147 + if (!ret) { 148 + SetPageUptodate(head_to_cache); 149 + unlock_page(head_to_cache); 150 + } 151 + 152 + } 153 + 154 + if (tail_to_cache) { 155 + int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping, 156 + (read_end >> PAGE_SHIFT) - 1, 157 + GFP_NOIO); 158 + 159 + if (!ret) { 160 + SetPageUptodate(tail_to_cache); 161 + unlock_page(tail_to_cache); 162 + } 163 + } 164 + 165 + return 0; 166 + } 167 + 79 168 static int squashfs_bio_read(struct super_block *sb, u64 index, int length, 80 169 struct bio **biop, int *block_offset) 81 170 { 82 171 struct squashfs_sb_info *msblk = sb->s_fs_info; 172 + struct address_space *cache_mapping = msblk->cache_mapping; 83 173 const u64 read_start = round_down(index, msblk->devblksize); 84 174 const sector_t block = read_start >> msblk->devblksize_log2; 85 175 const u64 read_end = round_up(index + length, msblk->devblksize); ··· 190 98 for (i = 0; i < page_count; ++i) { 191 99 unsigned int len = 192 100 min_t(unsigned int, PAGE_SIZE - offset, total_len); 193 - struct page *page = alloc_page(GFP_NOIO); 101 + struct page *page = NULL; 102 + 103 + if (cache_mapping) 104 + page = find_get_page(cache_mapping, 105 + (read_start >> PAGE_SHIFT) + i); 106 + if (!page) 107 + page = alloc_page(GFP_NOIO); 194 108 195 109 if (!page) { 196 110 error = -ENOMEM; 197 111 goto out_free_bio; 198 112 } 199 - if (!bio_add_page(bio, page, len, offset)) { 200 - error = -EIO; 201 - goto out_free_bio; 202 - } 113 + 114 + /* 115 + * Use the __ version to avoid merging since we need each page 116 + * to be separate when we check for and avoid cached pages. 117 + */ 118 + __bio_add_page(bio, page, len, offset); 203 119 offset = 0; 204 120 total_len -= len; 205 121 } 206 122 207 - error = submit_bio_wait(bio); 123 + if (cache_mapping) 124 + error = squashfs_bio_read_cached(bio, cache_mapping, index, 125 + length, read_start, read_end, 126 + page_count); 127 + else 128 + error = submit_bio_wait(bio); 208 129 if (error) 209 130 goto out_free_bio; 210 131
+1
fs/squashfs/squashfs_fs_sb.h
··· 47 47 struct squashfs_cache *block_cache; 48 48 struct squashfs_cache *fragment_cache; 49 49 struct squashfs_cache *read_page; 50 + struct address_space *cache_mapping; 50 51 int next_meta_index; 51 52 __le64 *id_table; 52 53 __le64 *fragment_index;
+17
fs/squashfs/super.c
··· 329 329 goto failed_mount; 330 330 } 331 331 332 + if (msblk->devblksize == PAGE_SIZE) { 333 + struct inode *cache = new_inode(sb); 334 + 335 + if (cache == NULL) 336 + goto failed_mount; 337 + 338 + set_nlink(cache, 1); 339 + cache->i_size = OFFSET_MAX; 340 + mapping_set_gfp_mask(cache->i_mapping, GFP_NOFS); 341 + 342 + msblk->cache_mapping = cache->i_mapping; 343 + } 344 + 332 345 msblk->stream = squashfs_decompressor_setup(sb, flags); 333 346 if (IS_ERR(msblk->stream)) { 334 347 err = PTR_ERR(msblk->stream); ··· 467 454 squashfs_cache_delete(msblk->block_cache); 468 455 squashfs_cache_delete(msblk->fragment_cache); 469 456 squashfs_cache_delete(msblk->read_page); 457 + if (msblk->cache_mapping) 458 + iput(msblk->cache_mapping->host); 470 459 msblk->thread_ops->destroy(msblk); 471 460 kfree(msblk->inode_lookup_table); 472 461 kfree(msblk->fragment_index); ··· 587 572 squashfs_cache_delete(sbi->block_cache); 588 573 squashfs_cache_delete(sbi->fragment_cache); 589 574 squashfs_cache_delete(sbi->read_page); 575 + if (sbi->cache_mapping) 576 + iput(sbi->cache_mapping->host); 590 577 sbi->thread_ops->destroy(sbi); 591 578 kfree(sbi->id_table); 592 579 kfree(sbi->fragment_index);