Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

erofs: lzma compression support

Add MicroLZMA support in order to maximize compression ratios for
specific scenarios. For example, it's useful for low-end embedded
boards and as a secondary algorithm in a file for specific access
patterns.

MicroLZMA is a new container format for raw LZMA1, which was created
by Lasse Collin aiming to minimize old LZMA headers and get rid of
unnecessary EOPM (end of payload marker) as well as to enable
fixed-sized output compression, especially for 4KiB pclusters.

Similar to LZ4, inplace I/O approach is used to minimize runtime
memory footprint when dealing with I/O. Overlapped decompression is
handled with 1) bounced buffer for data under processing or 2) extra
short-lived pages from the on-stack pagepool which will be shared in
the same read request (128KiB for example).

Link: https://lore.kernel.org/r/20211010213145.17462-8-xiang@kernel.org
Acked-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>

+383 -21
+16
fs/erofs/Kconfig
··· 82 82 Enable fixed-sized output compression for EROFS. 83 83 84 84 If you don't want to enable compression feature, say N. 85 + 86 + config EROFS_FS_ZIP_LZMA 87 + bool "EROFS LZMA compressed data support" 88 + depends on EROFS_FS_ZIP 89 + select XZ_DEC 90 + select XZ_DEC_MICROLZMA 91 + help 92 + Saying Y here includes support for reading EROFS file systems 93 + containing LZMA compressed data, specifically called microLZMA. it 94 + gives better compression ratios than the LZ4 algorithm, at the 95 + expense of more CPU overhead. 96 + 97 + LZMA support is an experimental feature for now and so most file 98 + systems will be readable without selecting this option. 99 + 100 + If unsure, say N.
+1
fs/erofs/Makefile
··· 4 4 erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o 5 5 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o 6 6 erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o 7 + erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+16
fs/erofs/compress.h
··· 20 20 bool inplace_io, partial_decoding; 21 21 }; 22 22 23 + struct z_erofs_decompressor { 24 + int (*decompress)(struct z_erofs_decompress_req *rq, 25 + struct list_head *pagepool); 26 + char *name; 27 + }; 28 + 23 29 /* some special page->private (unsigned long, see below) */ 24 30 #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) 25 31 #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) ··· 81 75 return true; 82 76 } 83 77 78 + #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) 79 + static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, 80 + struct page *page) 81 + { 82 + return page->mapping == MNGD_MAPPING(sbi); 83 + } 84 + 84 85 int z_erofs_decompress(struct z_erofs_decompress_req *rq, 85 86 struct list_head *pagepool); 86 87 88 + /* prototypes for specific algorithms */ 89 + int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, 90 + struct list_head *pagepool); 87 91 #endif
+6 -6
fs/erofs/decompressor.c
··· 16 16 #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) 17 17 #endif 18 18 19 - struct z_erofs_decompressor { 20 - int (*decompress)(struct z_erofs_decompress_req *rq, 21 - struct list_head *pagepool); 22 - char *name; 23 - }; 24 - 25 19 int z_erofs_load_lz4_config(struct super_block *sb, 26 20 struct erofs_super_block *dsb, 27 21 struct z_erofs_lz4_cfgs *lz4, int size) ··· 343 349 .decompress = z_erofs_lz4_decompress, 344 350 .name = "lz4" 345 351 }, 352 + #ifdef CONFIG_EROFS_FS_ZIP_LZMA 353 + [Z_EROFS_COMPRESSION_LZMA] = { 354 + .decompress = z_erofs_lzma_decompress, 355 + .name = "lzma" 356 + }, 357 + #endif 346 358 }; 347 359 348 360 int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+290
fs/erofs/decompressor_lzma.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + #include <linux/xz.h> 3 + #include <linux/module.h> 4 + #include "compress.h" 5 + 6 + struct z_erofs_lzma { 7 + struct z_erofs_lzma *next; 8 + struct xz_dec_microlzma *state; 9 + struct xz_buf buf; 10 + u8 bounce[PAGE_SIZE]; 11 + }; 12 + 13 + /* considering the LZMA performance, no need to use a lockless list for now */ 14 + static DEFINE_SPINLOCK(z_erofs_lzma_lock); 15 + static unsigned int z_erofs_lzma_max_dictsize; 16 + static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms; 17 + static struct z_erofs_lzma *z_erofs_lzma_head; 18 + static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq); 19 + 20 + module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444); 21 + 22 + void z_erofs_lzma_exit(void) 23 + { 24 + /* there should be no running fs instance */ 25 + while (z_erofs_lzma_avail_strms) { 26 + struct z_erofs_lzma *strm; 27 + 28 + spin_lock(&z_erofs_lzma_lock); 29 + strm = z_erofs_lzma_head; 30 + if (!strm) { 31 + spin_unlock(&z_erofs_lzma_lock); 32 + DBG_BUGON(1); 33 + return; 34 + } 35 + z_erofs_lzma_head = NULL; 36 + spin_unlock(&z_erofs_lzma_lock); 37 + 38 + while (strm) { 39 + struct z_erofs_lzma *n = strm->next; 40 + 41 + if (strm->state) 42 + xz_dec_microlzma_end(strm->state); 43 + kfree(strm); 44 + --z_erofs_lzma_avail_strms; 45 + strm = n; 46 + } 47 + } 48 + } 49 + 50 + int z_erofs_lzma_init(void) 51 + { 52 + unsigned int i; 53 + 54 + /* by default, use # of possible CPUs instead */ 55 + if (!z_erofs_lzma_nstrms) 56 + z_erofs_lzma_nstrms = num_possible_cpus(); 57 + 58 + for (i = 0; i < z_erofs_lzma_nstrms; ++i) { 59 + struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL); 60 + 61 + if (!strm) { 62 + z_erofs_lzma_exit(); 63 + return -ENOMEM; 64 + } 65 + spin_lock(&z_erofs_lzma_lock); 66 + strm->next = z_erofs_lzma_head; 67 + z_erofs_lzma_head = strm; 68 + spin_unlock(&z_erofs_lzma_lock); 69 + ++z_erofs_lzma_avail_strms; 70 + } 71 + return 0; 72 + } 73 + 74 + int z_erofs_load_lzma_config(struct super_block *sb, 75 + struct erofs_super_block *dsb, 76 + struct z_erofs_lzma_cfgs *lzma, int size) 77 + { 78 + static DEFINE_MUTEX(lzma_resize_mutex); 79 + unsigned int dict_size, i; 80 + struct z_erofs_lzma *strm, *head = NULL; 81 + int err; 82 + 83 + if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) { 84 + erofs_err(sb, "invalid lzma cfgs, size=%u", size); 85 + return -EINVAL; 86 + } 87 + if (lzma->format) { 88 + erofs_err(sb, "unidentified lzma format %x, please check kernel version", 89 + le16_to_cpu(lzma->format)); 90 + return -EINVAL; 91 + } 92 + dict_size = le32_to_cpu(lzma->dict_size); 93 + if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) { 94 + erofs_err(sb, "unsupported lzma dictionary size %u", 95 + dict_size); 96 + return -EINVAL; 97 + } 98 + 99 + erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!"); 100 + 101 + /* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */ 102 + mutex_lock(&lzma_resize_mutex); 103 + 104 + if (z_erofs_lzma_max_dictsize >= dict_size) { 105 + mutex_unlock(&lzma_resize_mutex); 106 + return 0; 107 + } 108 + 109 + /* 1. collect/isolate all streams for the following check */ 110 + for (i = 0; i < z_erofs_lzma_avail_strms; ++i) { 111 + struct z_erofs_lzma *last; 112 + 113 + again: 114 + spin_lock(&z_erofs_lzma_lock); 115 + strm = z_erofs_lzma_head; 116 + if (!strm) { 117 + spin_unlock(&z_erofs_lzma_lock); 118 + wait_event(z_erofs_lzma_wq, 119 + READ_ONCE(z_erofs_lzma_head)); 120 + goto again; 121 + } 122 + z_erofs_lzma_head = NULL; 123 + spin_unlock(&z_erofs_lzma_lock); 124 + 125 + for (last = strm; last->next; last = last->next) 126 + ++i; 127 + last->next = head; 128 + head = strm; 129 + } 130 + 131 + err = 0; 132 + /* 2. walk each isolated stream and grow max dict_size if needed */ 133 + for (strm = head; strm; strm = strm->next) { 134 + if (strm->state) 135 + xz_dec_microlzma_end(strm->state); 136 + strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size); 137 + if (!strm->state) 138 + err = -ENOMEM; 139 + } 140 + 141 + /* 3. push back all to the global list and update max dict_size */ 142 + spin_lock(&z_erofs_lzma_lock); 143 + DBG_BUGON(z_erofs_lzma_head); 144 + z_erofs_lzma_head = head; 145 + spin_unlock(&z_erofs_lzma_lock); 146 + 147 + z_erofs_lzma_max_dictsize = dict_size; 148 + mutex_unlock(&lzma_resize_mutex); 149 + return err; 150 + } 151 + 152 + int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, 153 + struct list_head *pagepool) 154 + { 155 + const unsigned int nrpages_out = 156 + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; 157 + const unsigned int nrpages_in = 158 + PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; 159 + unsigned int inputmargin, inlen, outlen, pageofs; 160 + struct z_erofs_lzma *strm; 161 + u8 *kin; 162 + bool bounced = false; 163 + int no, ni, j, err = 0; 164 + 165 + /* 1. get the exact LZMA compressed size */ 166 + kin = kmap(*rq->in); 167 + inputmargin = 0; 168 + while (!kin[inputmargin & ~PAGE_MASK]) 169 + if (!(++inputmargin & ~PAGE_MASK)) 170 + break; 171 + 172 + if (inputmargin >= PAGE_SIZE) { 173 + kunmap(*rq->in); 174 + return -EFSCORRUPTED; 175 + } 176 + rq->inputsize -= inputmargin; 177 + 178 + /* 2. get an available lzma context */ 179 + again: 180 + spin_lock(&z_erofs_lzma_lock); 181 + strm = z_erofs_lzma_head; 182 + if (!strm) { 183 + spin_unlock(&z_erofs_lzma_lock); 184 + wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head)); 185 + goto again; 186 + } 187 + z_erofs_lzma_head = strm->next; 188 + spin_unlock(&z_erofs_lzma_lock); 189 + 190 + /* 3. multi-call decompress */ 191 + inlen = rq->inputsize; 192 + outlen = rq->outputsize; 193 + xz_dec_microlzma_reset(strm->state, inlen, outlen, 194 + !rq->partial_decoding); 195 + pageofs = rq->pageofs_out; 196 + strm->buf.in = kin + inputmargin; 197 + strm->buf.in_pos = 0; 198 + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); 199 + inlen -= strm->buf.in_size; 200 + strm->buf.out = NULL; 201 + strm->buf.out_pos = 0; 202 + strm->buf.out_size = 0; 203 + 204 + for (ni = 0, no = -1;;) { 205 + enum xz_ret xz_err; 206 + 207 + if (strm->buf.out_pos == strm->buf.out_size) { 208 + if (strm->buf.out) { 209 + kunmap(rq->out[no]); 210 + strm->buf.out = NULL; 211 + } 212 + 213 + if (++no >= nrpages_out || !outlen) { 214 + erofs_err(rq->sb, "decompressed buf out of bound"); 215 + err = -EFSCORRUPTED; 216 + break; 217 + } 218 + strm->buf.out_pos = 0; 219 + strm->buf.out_size = min_t(u32, outlen, 220 + PAGE_SIZE - pageofs); 221 + outlen -= strm->buf.out_size; 222 + if (rq->out[no]) 223 + strm->buf.out = kmap(rq->out[no]) + pageofs; 224 + pageofs = 0; 225 + } else if (strm->buf.in_pos == strm->buf.in_size) { 226 + kunmap(rq->in[ni]); 227 + 228 + if (++ni >= nrpages_in || !inlen) { 229 + erofs_err(rq->sb, "compressed buf out of bound"); 230 + err = -EFSCORRUPTED; 231 + break; 232 + } 233 + strm->buf.in_pos = 0; 234 + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE); 235 + inlen -= strm->buf.in_size; 236 + kin = kmap(rq->in[ni]); 237 + strm->buf.in = kin; 238 + bounced = false; 239 + } 240 + 241 + /* 242 + * Handle overlapping: Use bounced buffer if the compressed 243 + * data is under processing; Otherwise, Use short-lived pages 244 + * from the on-stack pagepool where pages share with the same 245 + * request. 246 + */ 247 + if (!bounced && rq->out[no] == rq->in[ni]) { 248 + memcpy(strm->bounce, strm->buf.in, strm->buf.in_size); 249 + strm->buf.in = strm->bounce; 250 + bounced = true; 251 + } 252 + for (j = ni + 1; j < nrpages_in; ++j) { 253 + struct page *tmppage; 254 + 255 + if (rq->out[no] != rq->in[j]) 256 + continue; 257 + 258 + DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), 259 + rq->in[j])); 260 + tmppage = erofs_allocpage(pagepool, 261 + GFP_KERNEL | __GFP_NOFAIL); 262 + set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); 263 + copy_highpage(tmppage, rq->in[j]); 264 + rq->in[j] = tmppage; 265 + } 266 + xz_err = xz_dec_microlzma_run(strm->state, &strm->buf); 267 + DBG_BUGON(strm->buf.out_pos > strm->buf.out_size); 268 + DBG_BUGON(strm->buf.in_pos > strm->buf.in_size); 269 + 270 + if (xz_err != XZ_OK) { 271 + if (xz_err == XZ_STREAM_END && !outlen) 272 + break; 273 + erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]", 274 + xz_err, rq->inputsize, rq->outputsize); 275 + err = -EFSCORRUPTED; 276 + break; 277 + } 278 + } 279 + if (no < nrpages_out && strm->buf.out) 280 + kunmap(rq->in[no]); 281 + if (ni < nrpages_in) 282 + kunmap(rq->in[ni]); 283 + /* 4. push back LZMA stream context to the global list */ 284 + spin_lock(&z_erofs_lzma_lock); 285 + strm->next = z_erofs_lzma_head; 286 + z_erofs_lzma_head = strm; 287 + spin_unlock(&z_erofs_lzma_lock); 288 + wake_up(&z_erofs_lzma_wq); 289 + return err; 290 + }
+12 -2
fs/erofs/erofs_fs.h
··· 264 264 265 265 /* available compression algorithm types (for h_algorithmtype) */ 266 266 enum { 267 - Z_EROFS_COMPRESSION_LZ4 = 0, 267 + Z_EROFS_COMPRESSION_LZ4 = 0, 268 + Z_EROFS_COMPRESSION_LZMA = 1, 268 269 Z_EROFS_COMPRESSION_MAX 269 270 }; 270 - #define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) 271 + #define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) 271 272 272 273 /* 14 bytes (+ length field = 16 bytes) */ 273 274 struct z_erofs_lz4_cfgs { ··· 276 275 __le16 max_pclusterblks; 277 276 u8 reserved[10]; 278 277 } __packed; 278 + 279 + /* 14 bytes (+ length field = 16 bytes) */ 280 + struct z_erofs_lzma_cfgs { 281 + __le32 dict_size; 282 + __le16 format; 283 + u8 reserved[8]; 284 + } __packed; 285 + 286 + #define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) 279 287 280 288 /* 281 289 * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+22
fs/erofs/internal.h
··· 407 407 * approach instead if possible since it's more metadata lightweight.) 408 408 */ 409 409 #define EROFS_GET_BLOCKS_FIEMAP 0x0002 410 + /* Used to map the whole extent if non-negligible data is requested for LZMA */ 411 + #define EROFS_GET_BLOCKS_READMORE 0x0004 410 412 411 413 enum { 412 414 Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, ··· 533 531 { 534 532 if (lz4 || dsb->u1.lz4_max_distance) { 535 533 erofs_err(sb, "lz4 algorithm isn't enabled"); 534 + return -EINVAL; 535 + } 536 + return 0; 537 + } 538 + #endif /* !CONFIG_EROFS_FS_ZIP */ 539 + 540 + #ifdef CONFIG_EROFS_FS_ZIP_LZMA 541 + int z_erofs_lzma_init(void); 542 + void z_erofs_lzma_exit(void); 543 + int z_erofs_load_lzma_config(struct super_block *sb, 544 + struct erofs_super_block *dsb, 545 + struct z_erofs_lzma_cfgs *lzma, int size); 546 + #else 547 + static inline int z_erofs_lzma_init(void) { return 0; } 548 + static inline int z_erofs_lzma_exit(void) { return 0; } 549 + static inline int z_erofs_load_lzma_config(struct super_block *sb, 550 + struct erofs_super_block *dsb, 551 + struct z_erofs_lzma_cfgs *lzma, int size) { 552 + if (lzma) { 553 + erofs_err(sb, "lzma algorithm isn't enabled"); 536 554 return -EINVAL; 537 555 } 538 556 return 0;
+14 -3
fs/erofs/super.c
··· 225 225 case Z_EROFS_COMPRESSION_LZ4: 226 226 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 227 227 break; 228 + case Z_EROFS_COMPRESSION_LZMA: 229 + ret = z_erofs_load_lzma_config(sb, dsb, data, size); 230 + break; 228 231 default: 229 232 DBG_BUGON(1); 230 233 ret = -EFAULT; ··· 843 840 if (err) 844 841 goto shrinker_err; 845 842 843 + err = z_erofs_lzma_init(); 844 + if (err) 845 + goto lzma_err; 846 + 846 847 erofs_pcpubuf_init(); 847 848 err = z_erofs_init_zip_subsystem(); 848 849 if (err) ··· 861 854 fs_err: 862 855 z_erofs_exit_zip_subsystem(); 863 856 zip_err: 857 + z_erofs_lzma_exit(); 858 + lzma_err: 864 859 erofs_exit_shrinker(); 865 860 shrinker_err: 866 861 kmem_cache_destroy(erofs_inode_cachep); ··· 873 864 static void __exit erofs_module_exit(void) 874 865 { 875 866 unregister_filesystem(&erofs_fs_type); 876 - z_erofs_exit_zip_subsystem(); 877 - erofs_exit_shrinker(); 878 867 879 - /* Ensure all RCU free inodes are safe before cache is destroyed. */ 868 + /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 880 869 rcu_barrier(); 870 + 871 + z_erofs_exit_zip_subsystem(); 872 + z_erofs_lzma_exit(); 873 + erofs_exit_shrinker(); 881 874 kmem_cache_destroy(erofs_inode_cachep); 882 875 erofs_pcpubuf_exit(); 883 876 }
+2 -2
fs/erofs/zdata.c
··· 1404 1404 1405 1405 if (backmost) { 1406 1406 map->m_la = end; 1407 - /* TODO: pass in EROFS_GET_BLOCKS_READMORE for LZMA later */ 1408 - err = z_erofs_map_blocks_iter(inode, map, 0); 1407 + err = z_erofs_map_blocks_iter(inode, map, 1408 + EROFS_GET_BLOCKS_READMORE); 1409 1409 if (err) 1410 1410 return; 1411 1411
-7
fs/erofs/zdata.h
··· 94 94 } u; 95 95 }; 96 96 97 - #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) 98 - static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, 99 - struct page *page) 100 - { 101 - return page->mapping == MNGD_MAPPING(sbi); 102 - } 103 - 104 97 #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 105 98 #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) 106 99 #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
+4 -1
fs/erofs/zmap.c
··· 672 672 else 673 673 map->m_algorithmformat = vi->z_algorithmtype[0]; 674 674 675 - if (flags & EROFS_GET_BLOCKS_FIEMAP) { 675 + if ((flags & EROFS_GET_BLOCKS_FIEMAP) || 676 + ((flags & EROFS_GET_BLOCKS_READMORE) && 677 + map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA && 678 + map->m_llen >= EROFS_BLKSIZ)) { 676 679 err = z_erofs_get_extent_decompressedlen(&m); 677 680 if (!err) 678 681 map->m_flags |= EROFS_MAP_FULL_MAPPED;