erofs: lzma compression support · tjh.dev/kernel@622cead

+16

fs/erofs/Kconfig

··· 82 82 Enable fixed-sized output compression for EROFS. 83 83 84 84 If you don't want to enable compression feature, say N. 85 + 86 + config EROFS_FS_ZIP_LZMA 87 + bool "EROFS LZMA compressed data support" 88 + depends on EROFS_FS_ZIP 89 + select XZ_DEC 90 + select XZ_DEC_MICROLZMA 91 + help 92 + Saying Y here includes support for reading EROFS file systems 93 + containing LZMA compressed data, specifically called microLZMA. it 94 + gives better compression ratios than the LZ4 algorithm, at the 95 + expense of more CPU overhead. 96 + 97 + LZMA support is an experimental feature for now and so most file 98 + systems will be readable without selecting this option. 99 + 100 + If unsure, say N.

+1

fs/erofs/Makefile

··· 4 4 erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o 5 5 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o 6 6 erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o 7 + erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o

+16

fs/erofs/compress.h

··· 20 20 bool inplace_io, partial_decoding; 21 21 }; 22 22 23 + struct z_erofs_decompressor { 24 + int (*decompress)(struct z_erofs_decompress_req *rq, 25 + struct list_head *pagepool); 26 + char *name; 27 + }; 28 + 23 29 /* some special page->private (unsigned long, see below) */ 24 30 #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) 25 31 #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) ··· 81 75 return true; 82 76 } 83 77 78 + #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) 79 + static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, 80 + struct page *page) 81 + { 82 + return page->mapping == MNGD_MAPPING(sbi); 83 + } 84 + 84 85 int z_erofs_decompress(struct z_erofs_decompress_req *rq, 85 86 struct list_head *pagepool); 86 87 88 + /* prototypes for specific algorithms */ 89 + int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, 90 + struct list_head *pagepool); 87 91 #endif

+6 -6

fs/erofs/decompressor.c

··· 16 16 #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) 17 17 #endif 18 18 19 - struct z_erofs_decompressor { 20 - int (*decompress)(struct z_erofs_decompress_req *rq, 21 - struct list_head *pagepool); 22 - char *name; 23 - }; 24 - 25 19 int z_erofs_load_lz4_config(struct super_block *sb, 26 20 struct erofs_super_block *dsb, 27 21 struct z_erofs_lz4_cfgs *lz4, int size) ··· 343 349 .decompress = z_erofs_lz4_decompress, 344 350 .name = "lz4" 345 351 }, 352 + #ifdef CONFIG_EROFS_FS_ZIP_LZMA 353 + [Z_EROFS_COMPRESSION_LZMA] = { 354 + .decompress = z_erofs_lzma_decompress, 355 + .name = "lzma" 356 + }, 357 + #endif 346 358 }; 347 359 348 360 int z_erofs_decompress(struct z_erofs_decompress_req *rq,

+290

fs/erofs/decompressor_lzma.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + #include <linux/xz.h> 3 + #include <linux/module.h> 4 + #include "compress.h" 5 + 6 + struct z_erofs_lzma { 7 + struct z_erofs_lzma *next; 8 + struct xz_dec_microlzma *state; 9 + struct xz_buf buf; 10 + u8 bounce[PAGE_SIZE]; 11 + }; 12 + 13 + /* considering the LZMA performance, no need to use a lockless list for now */ 14 + static DEFINE_SPINLOCK(z_erofs_lzma_lock); 15 + static unsigned int z_erofs_lzma_max_dictsize; 16 + static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms; 17 + static struct z_erofs_lzma *z_erofs_lzma_head; 18 + static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq); 19 + 20 + module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444); 21 + 22 + void z_erofs_lzma_exit(void) 23 + { 24 + /* there should be no running fs instance */ 25 + while (z_erofs_lzma_avail_strms) { 26 + struct z_erofs_lzma *strm; 27 + 28 + spin_lock(&z_erofs_lzma_lock); 29 + strm = z_erofs_lzma_head; 30 + if (!strm) { 31 + spin_unlock(&z_erofs_lzma_lock); 32 + DBG_BUGON(1); 33 + return; 34 + } 35 + z_erofs_lzma_head = NULL; 36 + spin_unlock(&z_erofs_lzma_lock); 37 + 38 + while (strm) { 39 + struct z_erofs_lzma *n = strm->next; 40 + 41 + if (strm->state) 42 + xz_dec_microlzma_end(strm->state); 43 + kfree(strm); 44 + --z_erofs_lzma_avail_strms; 45 + strm = n; 46 + } 47 + } 48 + } 49 + 50 + int z_erofs_lzma_init(void) 51 + { 52 + unsigned int i; 53 + 54 + /* by default, use # of possible CPUs instead */ 55 + if (!z_erofs_lzma_nstrms) 56 + z_erofs_lzma_nstrms = num_possible_cpus(); 57 + 58 + for (i = 0; i < z_erofs_lzma_nstrms; ++i) { 59 + struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL); 60 + 61 + if (!strm) { 62 + z_erofs_lzma_exit(); 63 + return -ENOMEM; 64 + } 65 + spin_lock(&z_erofs_lzma_lock); 66 + strm->next = z_erofs_lzma_head; 67 + z_erofs_lzma_head = strm; 68 + spin_unlock(&z_erofs_lzma_lock); 69 + ++z_erofs_lzma_avail_strms; 70 + } 71 + return 0; 72 + } 73 + 74 + int z_erofs_load_lzma_config(struct super_block *sb, 75 + struct erofs_super_block *dsb, 76 + struct z_erofs_lzma_cfgs *lzma, int size) 77 + { 78 + static DEFINE_MUTEX(lzma_resize_mutex); 79 + unsigned int dict_size, i; 80 + struct z_erofs_lzma *strm, *head = NULL; 81 + int err; 82 + 83 + if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) { 84 + erofs_err(sb, "invalid lzma cfgs, size=%u", size); 85 + return -EINVAL; 86 + } 87 + if (lzma->format) { 88 + erofs_err(sb, "unidentified lzma format %x, please check kernel version", 89 + le16_to_cpu(lzma->format)); 90 + return -EINVAL; 91 + } 92 + dict_size = le32_to_cpu(lzma->dict_size); 93 + if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) { 94 + erofs_err(sb, "unsupported lzma dictionary size %u", 95 + dict_size); 96 + return -EINVAL; 97 + } 98 + 99 + erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!"); 100 + 101 + /* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */ 102 + mutex_lock(&lzma_resize_mutex); 103 + 104 + if (z_erofs_lzma_max_dictsize >= dict_size) { 105 + mutex_unlock(&lzma_resize_mutex); 106 + return 0; 107 + } 108 + 109 + /* 1. collect/isolate all streams for the following check */ 110 + for (i = 0; i < z_erofs_lzma_avail_strms; ++i) { 111 + struct z_erofs_lzma *last; 112 + 113 + again: 114 + spin_lock(&z_erofs_lzma_lock); 115 + strm = z_erofs_lzma_head; 116 + if (!strm) { 117 + spin_unlock(&z_erofs_lzma_lock); 118 + wait_event(z_erofs_lzma_wq, 119 + READ_ONCE(z_erofs_lzma_head)); 120 + goto again; 121 + } 122 + z_erofs_lzma_head = NULL; 123 + spin_unlock(&z_erofs_lzma_lock); 124 + 125 + for (last = strm; last->next; last = last->next) 126 + ++i; 127 + last->next = head; 128 + head = strm; 129 + } 130 + 131 + err = 0; 132 + /* 2. walk each isolated stream and grow max dict_size if needed */ 133 + for (strm = head; strm; strm = strm->next) { 134 + if (strm->state) 135 + xz_dec_microlzma_end(strm->state); 136 + strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size); 137 + if (!strm->state) 138 + err = -ENOMEM; 139 + } 140 + 141 + /* 3. push back all to the global list and update max dict_size */ 142 + spin_lock(&z_erofs_lzma_lock); 143 + DBG_BUGON(z_erofs_lzma_head); 144 + z_erofs_lzma_head = head; 145 + spin_unlock(&z_erofs_lzma_lock); 146 + 147 + z_erofs_lzma_max_dictsize = dict_size; 148 + mutex_unlock(&lzma_resize_mutex); 149 + return err; 150 + } 151 + 152 + int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, 153 + struct list_head *pagepool) 154 + { 155 + const unsigned int nrpages_out = 156 + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; 157 + const unsigned int nrpages_in = 158 + PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; 159 + unsigned int inputmargin, inlen, outlen, pageofs; 160 + struct z_erofs_lzma *strm; 161 + u8 *kin; 162 + bool bounced = false; 163 + int no, ni, j, err = 0; 164 + 165 + /* 1. get the exact LZMA compressed size */ 166 + kin = kmap(*rq->in); 167 + inputmargin = 0; 168 + while (!kin[inputmargin & ~PAGE_MASK]) 169 + if (!(++inputmargin & ~PAGE_MASK)) 170 + break; 171 + 172 + if (inputmargin >= PAGE_SIZE) { 173 + kunmap(*rq->in); 174 + return -EFSCORRUPTED; 175 + } 176 + rq->inputsize -= inputmargin; 177 + 178 + /* 2. get an available lzma context */ 179 + again: 180 + spin_lock(&z_erofs_lzma_lock); 181 + strm = z_erofs_lzma_head; 182 + if (!strm) { 183 + spin_unlock(&z_erofs_lzma_lock); 184 + wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head)); 185 + goto again; 186 + } 187 + z_erofs_lzma_head = strm->next; 188 + spin_unlock(&z_erofs_lzma_lock); 189 + 190 + /* 3. multi-call decompress */ 191 + inlen = rq->inputsize; 192 + outlen = rq->outputsize; 193 + xz_dec_microlzma_reset(strm->state, inlen, outlen, 194 + !rq->partial_decoding); 195 + pageofs = rq->pageofs_out; 196 + strm->buf.in = kin + inputmargin; 197 + strm->buf.in_pos = 0; 198 + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); 199 + inlen -= strm->buf.in_size; 200 + strm->buf.out = NULL; 201 + strm->buf.out_pos = 0; 202 + strm->buf.out_size = 0; 203 + 204 + for (ni = 0, no = -1;;) { 205 + enum xz_ret xz_err; 206 + 207 + if (strm->buf.out_pos == strm->buf.out_size) { 208 + if (strm->buf.out) { 209 + kunmap(rq->out[no]); 210 + strm->buf.out = NULL; 211 + } 212 + 213 + if (++no >= nrpages_out || !outlen) { 214 + erofs_err(rq->sb, "decompressed buf out of bound"); 215 + err = -EFSCORRUPTED; 216 + break; 217 + } 218 + strm->buf.out_pos = 0; 219 + strm->buf.out_size = min_t(u32, outlen, 220 + PAGE_SIZE - pageofs); 221 + outlen -= strm->buf.out_size; 222 + if (rq->out[no]) 223 + strm->buf.out = kmap(rq->out[no]) + pageofs; 224 + pageofs = 0; 225 + } else if (strm->buf.in_pos == strm->buf.in_size) { 226 + kunmap(rq->in[ni]); 227 + 228 + if (++ni >= nrpages_in || !inlen) { 229 + erofs_err(rq->sb, "compressed buf out of bound"); 230 + err = -EFSCORRUPTED; 231 + break; 232 + } 233 + strm->buf.in_pos = 0; 234 + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE); 235 + inlen -= strm->buf.in_size; 236 + kin = kmap(rq->in[ni]); 237 + strm->buf.in = kin; 238 + bounced = false; 239 + } 240 + 241 + /* 242 + * Handle overlapping: Use bounced buffer if the compressed 243 + * data is under processing; Otherwise, Use short-lived pages 244 + * from the on-stack pagepool where pages share with the same 245 + * request. 246 + */ 247 + if (!bounced && rq->out[no] == rq->in[ni]) { 248 + memcpy(strm->bounce, strm->buf.in, strm->buf.in_size); 249 + strm->buf.in = strm->bounce; 250 + bounced = true; 251 + } 252 + for (j = ni + 1; j < nrpages_in; ++j) { 253 + struct page *tmppage; 254 + 255 + if (rq->out[no] != rq->in[j]) 256 + continue; 257 + 258 + DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), 259 + rq->in[j])); 260 + tmppage = erofs_allocpage(pagepool, 261 + GFP_KERNEL | __GFP_NOFAIL); 262 + set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); 263 + copy_highpage(tmppage, rq->in[j]); 264 + rq->in[j] = tmppage; 265 + } 266 + xz_err = xz_dec_microlzma_run(strm->state, &strm->buf); 267 + DBG_BUGON(strm->buf.out_pos > strm->buf.out_size); 268 + DBG_BUGON(strm->buf.in_pos > strm->buf.in_size); 269 + 270 + if (xz_err != XZ_OK) { 271 + if (xz_err == XZ_STREAM_END && !outlen) 272 + break; 273 + erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]", 274 + xz_err, rq->inputsize, rq->outputsize); 275 + err = -EFSCORRUPTED; 276 + break; 277 + } 278 + } 279 + if (no < nrpages_out && strm->buf.out) 280 + kunmap(rq->in[no]); 281 + if (ni < nrpages_in) 282 + kunmap(rq->in[ni]); 283 + /* 4. push back LZMA stream context to the global list */ 284 + spin_lock(&z_erofs_lzma_lock); 285 + strm->next = z_erofs_lzma_head; 286 + z_erofs_lzma_head = strm; 287 + spin_unlock(&z_erofs_lzma_lock); 288 + wake_up(&z_erofs_lzma_wq); 289 + return err; 290 + }

+12 -2

fs/erofs/erofs_fs.h

··· 264 264 265 265 /* available compression algorithm types (for h_algorithmtype) */ 266 266 enum { 267 - Z_EROFS_COMPRESSION_LZ4 = 0, 267 + Z_EROFS_COMPRESSION_LZ4 = 0, 268 + Z_EROFS_COMPRESSION_LZMA = 1, 268 269 Z_EROFS_COMPRESSION_MAX 269 270 }; 270 - #define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) 271 + #define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) 271 272 272 273 /* 14 bytes (+ length field = 16 bytes) */ 273 274 struct z_erofs_lz4_cfgs { ··· 276 275 __le16 max_pclusterblks; 277 276 u8 reserved[10]; 278 277 } __packed; 278 + 279 + /* 14 bytes (+ length field = 16 bytes) */ 280 + struct z_erofs_lzma_cfgs { 281 + __le32 dict_size; 282 + __le16 format; 283 + u8 reserved[8]; 284 + } __packed; 285 + 286 + #define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) 279 287 280 288 /* 281 289 * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)

+22

fs/erofs/internal.h

··· 407 407 * approach instead if possible since it's more metadata lightweight.) 408 408 */ 409 409 #define EROFS_GET_BLOCKS_FIEMAP 0x0002 410 + /* Used to map the whole extent if non-negligible data is requested for LZMA */ 411 + #define EROFS_GET_BLOCKS_READMORE 0x0004 410 412 411 413 enum { 412 414 Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, ··· 533 531 { 534 532 if (lz4 || dsb->u1.lz4_max_distance) { 535 533 erofs_err(sb, "lz4 algorithm isn't enabled"); 534 + return -EINVAL; 535 + } 536 + return 0; 537 + } 538 + #endif /* !CONFIG_EROFS_FS_ZIP */ 539 + 540 + #ifdef CONFIG_EROFS_FS_ZIP_LZMA 541 + int z_erofs_lzma_init(void); 542 + void z_erofs_lzma_exit(void); 543 + int z_erofs_load_lzma_config(struct super_block *sb, 544 + struct erofs_super_block *dsb, 545 + struct z_erofs_lzma_cfgs *lzma, int size); 546 + #else 547 + static inline int z_erofs_lzma_init(void) { return 0; } 548 + static inline int z_erofs_lzma_exit(void) { return 0; } 549 + static inline int z_erofs_load_lzma_config(struct super_block *sb, 550 + struct erofs_super_block *dsb, 551 + struct z_erofs_lzma_cfgs *lzma, int size) { 552 + if (lzma) { 553 + erofs_err(sb, "lzma algorithm isn't enabled"); 536 554 return -EINVAL; 537 555 } 538 556 return 0;

+14 -3

fs/erofs/super.c

··· 225 225 case Z_EROFS_COMPRESSION_LZ4: 226 226 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 227 227 break; 228 + case Z_EROFS_COMPRESSION_LZMA: 229 + ret = z_erofs_load_lzma_config(sb, dsb, data, size); 230 + break; 228 231 default: 229 232 DBG_BUGON(1); 230 233 ret = -EFAULT; ··· 843 840 if (err) 844 841 goto shrinker_err; 845 842 843 + err = z_erofs_lzma_init(); 844 + if (err) 845 + goto lzma_err; 846 + 846 847 erofs_pcpubuf_init(); 847 848 err = z_erofs_init_zip_subsystem(); 848 849 if (err) ··· 861 854 fs_err: 862 855 z_erofs_exit_zip_subsystem(); 863 856 zip_err: 857 + z_erofs_lzma_exit(); 858 + lzma_err: 864 859 erofs_exit_shrinker(); 865 860 shrinker_err: 866 861 kmem_cache_destroy(erofs_inode_cachep); ··· 873 864 static void __exit erofs_module_exit(void) 874 865 { 875 866 unregister_filesystem(&erofs_fs_type); 876 - z_erofs_exit_zip_subsystem(); 877 - erofs_exit_shrinker(); 878 867 879 - /* Ensure all RCU free inodes are safe before cache is destroyed. */ 868 + /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 880 869 rcu_barrier(); 870 + 871 + z_erofs_exit_zip_subsystem(); 872 + z_erofs_lzma_exit(); 873 + erofs_exit_shrinker(); 881 874 kmem_cache_destroy(erofs_inode_cachep); 882 875 erofs_pcpubuf_exit(); 883 876 }

+2 -2

fs/erofs/zdata.c

··· 1404 1404 1405 1405 if (backmost) { 1406 1406 map->m_la = end; 1407 - /* TODO: pass in EROFS_GET_BLOCKS_READMORE for LZMA later */ 1408 - err = z_erofs_map_blocks_iter(inode, map, 0); 1407 + err = z_erofs_map_blocks_iter(inode, map, 1408 + EROFS_GET_BLOCKS_READMORE); 1409 1409 if (err) 1410 1410 return; 1411 1411

-7

fs/erofs/zdata.h

··· 94 94 } u; 95 95 }; 96 96 97 - #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) 98 - static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, 99 - struct page *page) 100 - { 101 - return page->mapping == MNGD_MAPPING(sbi); 102 - } 103 - 104 97 #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 105 98 #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) 106 99 #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)

+4 -1

fs/erofs/zmap.c

··· 672 672 else 673 673 map->m_algorithmformat = vi->z_algorithmtype[0]; 674 674 675 - if (flags & EROFS_GET_BLOCKS_FIEMAP) { 675 + if ((flags & EROFS_GET_BLOCKS_FIEMAP) || 676 + ((flags & EROFS_GET_BLOCKS_READMORE) && 677 + map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA && 678 + map->m_llen >= EROFS_BLKSIZ)) { 676 679 err = z_erofs_get_extent_decompressedlen(&m); 677 680 if (!err) 678 681 map->m_flags |= EROFS_MAP_FULL_MAPPED;