Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'erofs-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
"In this cycle, EROFS 48-bit block addressing is available to support
massive datasets for model training and other large data archive use
cases.

In addition, byte-oriented encoded extents have been supported to
reduce metadata sizes when using large configurations as well as to
improve Zstd compression speed.

There are some bugfixes and cleanups as usual.

Summary:

- Support 48-bit block addressing for large images

- Introduce encoded extents to reduce metadata on larger pclusters

- Enable unaligned compressed data to improve Zstd compression speed

- Allow 16-byte volume names again

- Minor cleanups"

* tag 'erofs-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: enable 48-bit layout support
erofs: support unaligned encoded data
erofs: implement encoded extent metadata
erofs: add encoded extent on-disk definition
erofs: initialize decompression early
erofs: support dot-omitted directories
erofs: implement 48-bit block addressing for unencoded inodes
erofs: add 48-bit block addressing on-disk support
erofs: simplify erofs_{read,fill}_inode()
erofs: get rid of erofs_map_blocks_flatmode()
erofs: move {in,out}pages into struct z_erofs_decompress_req
erofs: clean up header parsing for ztailpacking and fragments
erofs: simplify tail inline pcluster handling
erofs: allow 16-byte volume name again
erofs: get rid of erofs_kmap_type
erofs: use Z_EROFS_LCLUSTER_TYPE_MAX to simplify switches

+583 -575
+7 -7
fs/erofs/Kconfig
··· 13 13 smartphones with Android OS, LiveCDs and high-density hosts with 14 14 numerous containers; 15 15 16 - It also provides fixed-sized output compression support in order to 17 - improve storage density as well as keep relatively higher compression 18 - ratios and implements in-place decompression to reuse the file page 19 - for compressed data temporarily with proper strategies, which is 20 - quite useful to ensure guaranteed end-to-end runtime decompression 21 - performance under extremely memory pressure without extra cost. 16 + It also provides transparent compression and deduplication support to 17 + improve storage density and maintain relatively high compression 18 + ratios, and it implements in-place decompression to temporarily reuse 19 + page cache for compressed data using proper strategies, which is 20 + quite useful for ensuring guaranteed end-to-end runtime decompression 21 + performance under extreme memory pressure without extra cost. 22 22 23 23 See the documentation at <file:Documentation/filesystems/erofs.rst> 24 24 and the web pages at <https://erofs.docs.kernel.org> for more details. ··· 97 97 select LZ4_DECOMPRESS 98 98 default y 99 99 help 100 - Enable fixed-sized output compression for EROFS. 100 + Enable transparent compression support for EROFS file systems. 101 101 102 102 If you don't want to enable compression feature, say N. 103 103
+1 -1
fs/erofs/compress.h
··· 11 11 struct z_erofs_decompress_req { 12 12 struct super_block *sb; 13 13 struct page **in, **out; 14 + unsigned int inpages, outpages; 14 15 unsigned short pageofs_in, pageofs_out; 15 16 unsigned int inputsize, outputsize; 16 17 ··· 60 59 61 60 struct z_erofs_stream_dctx { 62 61 struct z_erofs_decompress_req *rq; 63 - unsigned int inpages, outpages; /* # of {en,de}coded pages */ 64 62 int no, ni; /* the current {en,de}coded page # */ 65 63 66 64 unsigned int avail_out; /* remaining bytes in the decoded buffer */
+64 -84
fs/erofs/data.c
··· 25 25 buf->page = NULL; 26 26 } 27 27 28 - void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, 29 - enum erofs_kmap_type type) 28 + void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) 30 29 { 31 30 pgoff_t index = offset >> PAGE_SHIFT; 32 31 struct folio *folio = NULL; ··· 42 43 return folio; 43 44 } 44 45 buf->page = folio_file_page(folio, index); 45 - if (!buf->base && type == EROFS_KMAP) 46 - buf->base = kmap_local_page(buf->page); 47 - if (type == EROFS_NO_KMAP) 46 + if (!need_kmap) 48 47 return NULL; 48 + if (!buf->base) 49 + buf->base = kmap_local_page(buf->page); 49 50 return buf->base + (offset & ~PAGE_MASK); 50 51 } 51 52 ··· 64 65 } 65 66 66 67 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 67 - erofs_off_t offset, enum erofs_kmap_type type) 68 + erofs_off_t offset, bool need_kmap) 68 69 { 69 70 erofs_init_metabuf(buf, sb); 70 - return erofs_bread(buf, offset, type); 71 - } 72 - 73 - static int erofs_map_blocks_flatmode(struct inode *inode, 74 - struct erofs_map_blocks *map) 75 - { 76 - struct erofs_inode *vi = EROFS_I(inode); 77 - struct super_block *sb = inode->i_sb; 78 - bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); 79 - erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking; 80 - 81 - map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */ 82 - if (map->m_la < erofs_pos(sb, lastblk)) { 83 - map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; 84 - map->m_plen = erofs_pos(sb, lastblk) - map->m_la; 85 - } else { 86 - DBG_BUGON(!tailendpacking); 87 - map->m_pa = erofs_iloc(inode) + vi->inode_isize + 88 - vi->xattr_isize + erofs_blkoff(sb, map->m_la); 89 - map->m_plen = inode->i_size - map->m_la; 90 - 91 - /* inline data should be located in the same meta block */ 92 - if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { 93 - erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); 94 - DBG_BUGON(1); 95 - return -EFSCORRUPTED; 96 - } 97 - map->m_flags |= EROFS_MAP_META; 98 - } 99 - return 0; 71 + return erofs_bread(buf, offset, need_kmap); 100 72 } 101 73 102 74 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) 103 75 { 76 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 104 77 struct super_block *sb = inode->i_sb; 78 + unsigned int unit, blksz = sb->s_blocksize; 105 79 struct erofs_inode *vi = EROFS_I(inode); 106 80 struct erofs_inode_chunk_index *idx; 107 - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 108 - u64 chunknr; 109 - unsigned int unit; 81 + erofs_blk_t startblk, addrmask; 82 + bool tailpacking; 110 83 erofs_off_t pos; 111 - void *kaddr; 84 + u64 chunknr; 112 85 int err = 0; 113 86 114 87 trace_erofs_map_blocks_enter(inode, map, 0); 115 88 map->m_deviceid = 0; 116 - if (map->m_la >= inode->i_size) { 117 - /* leave out-of-bound access unmapped */ 118 - map->m_flags = 0; 119 - map->m_plen = map->m_llen; 89 + map->m_flags = 0; 90 + if (map->m_la >= inode->i_size) 120 91 goto out; 121 - } 122 92 123 93 if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { 124 - err = erofs_map_blocks_flatmode(inode, map); 94 + tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); 95 + if (!tailpacking && vi->startblk == EROFS_NULL_ADDR) 96 + goto out; 97 + pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking); 98 + 99 + map->m_flags = EROFS_MAP_MAPPED; 100 + if (map->m_la < pos) { 101 + map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la; 102 + map->m_llen = pos - map->m_la; 103 + } else { 104 + map->m_pa = erofs_iloc(inode) + vi->inode_isize + 105 + vi->xattr_isize + erofs_blkoff(sb, map->m_la); 106 + map->m_llen = inode->i_size - map->m_la; 107 + map->m_flags |= EROFS_MAP_META; 108 + } 125 109 goto out; 126 110 } 127 111 ··· 117 135 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + 118 136 vi->xattr_isize, unit) + unit * chunknr; 119 137 120 - kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); 121 - if (IS_ERR(kaddr)) { 122 - err = PTR_ERR(kaddr); 138 + idx = erofs_read_metabuf(&buf, sb, pos, true); 139 + if (IS_ERR(idx)) { 140 + err = PTR_ERR(idx); 123 141 goto out; 124 142 } 125 143 map->m_la = chunknr << vi->chunkbits; 126 - map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, 127 - round_up(inode->i_size - map->m_la, sb->s_blocksize)); 128 - 129 - /* handle block map */ 130 - if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { 131 - __le32 *blkaddr = kaddr; 132 - 133 - if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { 134 - map->m_flags = 0; 135 - } else { 136 - map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr)); 144 + map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits, 145 + round_up(inode->i_size - map->m_la, blksz)); 146 + if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) { 147 + addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ? 148 + BIT_ULL(48) - 1 : BIT_ULL(32) - 1; 149 + startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) | 150 + le32_to_cpu(idx->startblk_lo)) & addrmask; 151 + if ((startblk ^ EROFS_NULL_ADDR) & addrmask) { 152 + map->m_deviceid = le16_to_cpu(idx->device_id) & 153 + EROFS_SB(sb)->device_id_mask; 154 + map->m_pa = erofs_pos(sb, startblk); 137 155 map->m_flags = EROFS_MAP_MAPPED; 138 156 } 139 - goto out_unlock; 157 + } else { 158 + startblk = le32_to_cpu(*(__le32 *)idx); 159 + if (startblk != (u32)EROFS_NULL_ADDR) { 160 + map->m_pa = erofs_pos(sb, startblk); 161 + map->m_flags = EROFS_MAP_MAPPED; 162 + } 140 163 } 141 - /* parse chunk indexes */ 142 - idx = kaddr; 143 - switch (le32_to_cpu(idx->blkaddr)) { 144 - case EROFS_NULL_ADDR: 145 - map->m_flags = 0; 146 - break; 147 - default: 148 - map->m_deviceid = le16_to_cpu(idx->device_id) & 149 - EROFS_SB(sb)->device_id_mask; 150 - map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr)); 151 - map->m_flags = EROFS_MAP_MAPPED; 152 - break; 153 - } 154 - out_unlock: 155 164 erofs_put_metabuf(&buf); 156 165 out: 157 - if (!err) 158 - map->m_llen = map->m_plen; 166 + if (!err) { 167 + map->m_plen = map->m_llen; 168 + /* inline data should be located in the same meta block */ 169 + if ((map->m_flags & EROFS_MAP_META) && 170 + erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) { 171 + erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); 172 + DBG_BUGON(1); 173 + return -EFSCORRUPTED; 174 + } 175 + } 159 176 trace_erofs_map_blocks_exit(inode, map, 0, err); 160 177 return err; 161 178 } ··· 173 192 { 174 193 struct erofs_dev_context *devs = EROFS_SB(sb)->devs; 175 194 struct erofs_device_info *dif; 176 - erofs_off_t startoff, length; 195 + erofs_off_t startoff; 177 196 int id; 178 197 179 198 erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0); ··· 186 205 return -ENODEV; 187 206 } 188 207 if (devs->flatdev) { 189 - map->m_pa += erofs_pos(sb, dif->mapped_blkaddr); 208 + map->m_pa += erofs_pos(sb, dif->uniaddr); 190 209 up_read(&devs->rwsem); 191 210 return 0; 192 211 } ··· 195 214 } else if (devs->extra_devices && !devs->flatdev) { 196 215 down_read(&devs->rwsem); 197 216 idr_for_each_entry(&devs->tree, dif, id) { 198 - if (!dif->mapped_blkaddr) 217 + if (!dif->uniaddr) 199 218 continue; 200 219 201 - startoff = erofs_pos(sb, dif->mapped_blkaddr); 202 - length = erofs_pos(sb, dif->blocks); 220 + startoff = erofs_pos(sb, dif->uniaddr); 203 221 if (map->m_pa >= startoff && 204 - map->m_pa < startoff + length) { 222 + map->m_pa < startoff + erofs_pos(sb, dif->blocks)) { 205 223 map->m_pa -= startoff; 206 224 erofs_fill_from_devinfo(map, sb, dif); 207 225 break; ··· 292 312 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 293 313 294 314 iomap->type = IOMAP_INLINE; 295 - ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP); 315 + ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true); 296 316 if (IS_ERR(ptr)) 297 317 return PTR_ERR(ptr); 298 318 iomap->inline_data = ptr;
+36 -59
fs/erofs/decompressor.c
··· 9 9 10 10 #define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) 11 11 12 - struct z_erofs_lz4_decompress_ctx { 13 - struct z_erofs_decompress_req *rq; 14 - /* # of encoded, decoded pages */ 15 - unsigned int inpages, outpages; 16 - /* decoded block total length (used for in-place decompression) */ 17 - unsigned int oend; 18 - }; 19 - 20 12 static int z_erofs_load_lz4_config(struct super_block *sb, 21 13 struct erofs_super_block *dsb, void *data, int size) 22 14 { ··· 47 55 * Fill all gaps with bounce pages if it's a sparse page list. Also check if 48 56 * all physical pages are consecutive, which can be seen for moderate CR. 49 57 */ 50 - static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, 58 + static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, 51 59 struct page **pagepool) 52 60 { 53 - struct z_erofs_decompress_req *rq = ctx->rq; 54 61 struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; 55 62 unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, 56 63 BITS_PER_LONG)] = { 0 }; ··· 59 68 unsigned int i, j, top; 60 69 61 70 top = 0; 62 - for (i = j = 0; i < ctx->outpages; ++i, ++j) { 71 + for (i = j = 0; i < rq->outpages; ++i, ++j) { 63 72 struct page *const page = rq->out[i]; 64 73 struct page *victim; 65 74 ··· 105 114 return kaddr ? 1 : 0; 106 115 } 107 116 108 - static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, 117 + static void *z_erofs_lz4_handle_overlap(struct z_erofs_decompress_req *rq, 109 118 void *inpage, void *out, unsigned int *inputmargin, 110 119 int *maptype, bool may_inplace) 111 120 { 112 - struct z_erofs_decompress_req *rq = ctx->rq; 113 - unsigned int omargin, total, i; 121 + unsigned int oend, omargin, total, i; 114 122 struct page **in; 115 123 void *src, *tmp; 116 124 117 125 if (rq->inplace_io) { 118 - omargin = PAGE_ALIGN(ctx->oend) - ctx->oend; 126 + oend = rq->pageofs_out + rq->outputsize; 127 + omargin = PAGE_ALIGN(oend) - oend; 119 128 if (rq->partial_decoding || !may_inplace || 120 129 omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) 121 130 goto docopy; 122 131 123 - for (i = 0; i < ctx->inpages; ++i) 124 - if (rq->out[ctx->outpages - ctx->inpages + i] != 132 + for (i = 0; i < rq->inpages; ++i) 133 + if (rq->out[rq->outpages - rq->inpages + i] != 125 134 rq->in[i]) 126 135 goto docopy; 127 136 kunmap_local(inpage); 128 137 *maptype = 3; 129 - return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); 138 + return out + ((rq->outpages - rq->inpages) << PAGE_SHIFT); 130 139 } 131 140 132 - if (ctx->inpages <= 1) { 141 + if (rq->inpages <= 1) { 133 142 *maptype = 0; 134 143 return inpage; 135 144 } 136 145 kunmap_local(inpage); 137 - src = erofs_vm_map_ram(rq->in, ctx->inpages); 146 + src = erofs_vm_map_ram(rq->in, rq->inpages); 138 147 if (!src) 139 148 return ERR_PTR(-ENOMEM); 140 149 *maptype = 1; ··· 143 152 docopy: 144 153 /* Or copy compressed data which can be overlapped to per-CPU buffer */ 145 154 in = rq->in; 146 - src = z_erofs_get_gbuf(ctx->inpages); 155 + src = z_erofs_get_gbuf(rq->inpages); 147 156 if (!src) { 148 157 DBG_BUGON(1); 149 158 kunmap_local(inpage); ··· 188 197 return 0; 189 198 } 190 199 191 - static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, 192 - u8 *dst) 200 + static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, u8 *dst) 193 201 { 194 - struct z_erofs_decompress_req *rq = ctx->rq; 195 202 bool support_0padding = false, may_inplace = false; 196 203 unsigned int inputmargin; 197 204 u8 *out, *headpage, *src; ··· 213 224 } 214 225 215 226 inputmargin = rq->pageofs_in; 216 - src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, 227 + src = z_erofs_lz4_handle_overlap(rq, headpage, dst, &inputmargin, 217 228 &maptype, may_inplace); 218 229 if (IS_ERR(src)) 219 230 return PTR_ERR(src); ··· 240 251 if (maptype == 0) { 241 252 kunmap_local(headpage); 242 253 } else if (maptype == 1) { 243 - vm_unmap_ram(src, ctx->inpages); 254 + vm_unmap_ram(src, rq->inpages); 244 255 } else if (maptype == 2) { 245 256 z_erofs_put_gbuf(src); 246 257 } else if (maptype != 3) { ··· 253 264 static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, 254 265 struct page **pagepool) 255 266 { 256 - struct z_erofs_lz4_decompress_ctx ctx; 257 267 unsigned int dst_maptype; 258 268 void *dst; 259 269 int ret; 260 270 261 - ctx.rq = rq; 262 - ctx.oend = rq->pageofs_out + rq->outputsize; 263 - ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT; 264 - ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; 265 - 266 271 /* one optimized fast path only for non bigpcluster cases yet */ 267 - if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { 272 + if (rq->inpages == 1 && rq->outpages == 1 && !rq->inplace_io) { 268 273 DBG_BUGON(!*rq->out); 269 274 dst = kmap_local_page(*rq->out); 270 275 dst_maptype = 0; 271 - goto dstmap_out; 272 - } 273 - 274 - /* general decoding path which can be used for all cases */ 275 - ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool); 276 - if (ret < 0) { 277 - return ret; 278 - } else if (ret > 0) { 279 - dst = page_address(*rq->out); 280 - dst_maptype = 1; 281 276 } else { 282 - dst = erofs_vm_map_ram(rq->out, ctx.outpages); 283 - if (!dst) 284 - return -ENOMEM; 285 - dst_maptype = 2; 277 + /* general decoding path which can be used for all cases */ 278 + ret = z_erofs_lz4_prepare_dstpages(rq, pagepool); 279 + if (ret < 0) 280 + return ret; 281 + if (ret > 0) { 282 + dst = page_address(*rq->out); 283 + dst_maptype = 1; 284 + } else { 285 + dst = erofs_vm_map_ram(rq->out, rq->outpages); 286 + if (!dst) 287 + return -ENOMEM; 288 + dst_maptype = 2; 289 + } 286 290 } 287 - 288 - dstmap_out: 289 - ret = z_erofs_lz4_decompress_mem(&ctx, dst); 291 + ret = z_erofs_lz4_decompress_mem(rq, dst); 290 292 if (!dst_maptype) 291 293 kunmap_local(dst); 292 294 else if (dst_maptype == 2) 293 - vm_unmap_ram(dst, ctx.outpages); 295 + vm_unmap_ram(dst, rq->outpages); 294 296 return ret; 295 297 } 296 298 297 299 static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, 298 300 struct page **pagepool) 299 301 { 300 - const unsigned int nrpages_in = 301 - PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT; 302 - const unsigned int nrpages_out = 303 - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; 302 + const unsigned int nrpages_in = rq->inpages, nrpages_out = rq->outpages; 304 303 const unsigned int bs = rq->sb->s_blocksize; 305 304 unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt; 306 305 u8 *kin; ··· 313 336 rq->outputsize -= cur; 314 337 } 315 338 316 - for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) { 339 + for (; rq->outputsize; rq->pageofs_in = 0, cur += insz, ni++) { 317 340 insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize); 318 341 rq->outputsize -= insz; 319 342 if (!rq->in[ni]) ··· 350 373 unsigned int j; 351 374 352 375 if (!dctx->avail_out) { 353 - if (++dctx->no >= dctx->outpages || !rq->outputsize) { 376 + if (++dctx->no >= rq->outpages || !rq->outputsize) { 354 377 erofs_err(sb, "insufficient space for decompressed data"); 355 378 return -EFSCORRUPTED; 356 379 } ··· 378 401 } 379 402 380 403 if (dctx->inbuf_pos == dctx->inbuf_sz && rq->inputsize) { 381 - if (++dctx->ni >= dctx->inpages) { 404 + if (++dctx->ni >= rq->inpages) { 382 405 erofs_err(sb, "invalid compressed data"); 383 406 return -EFSCORRUPTED; 384 407 } ··· 411 434 dctx->bounced = true; 412 435 } 413 436 414 - for (j = dctx->ni + 1; j < dctx->inpages; ++j) { 437 + for (j = dctx->ni + 1; j < rq->inpages; ++j) { 415 438 if (rq->out[dctx->no] != rq->in[j]) 416 439 continue; 417 440 tmppage = erofs_allocpage(pgpl, rq->gfp);
+1 -7
fs/erofs/decompressor_deflate.c
··· 101 101 struct page **pgpl) 102 102 { 103 103 struct super_block *sb = rq->sb; 104 - struct z_erofs_stream_dctx dctx = { 105 - .rq = rq, 106 - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, 107 - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) 108 - >> PAGE_SHIFT, 109 - .no = -1, .ni = 0, 110 - }; 104 + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; 111 105 struct z_erofs_deflate *strm; 112 106 int zerr, err; 113 107
+1 -7
fs/erofs/decompressor_lzma.c
··· 150 150 struct page **pgpl) 151 151 { 152 152 struct super_block *sb = rq->sb; 153 - struct z_erofs_stream_dctx dctx = { 154 - .rq = rq, 155 - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, 156 - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) 157 - >> PAGE_SHIFT, 158 - .no = -1, .ni = 0, 159 - }; 153 + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; 160 154 struct xz_buf buf = {}; 161 155 struct z_erofs_lzma *strm; 162 156 enum xz_ret xz_err;
+1 -7
fs/erofs/decompressor_zstd.c
··· 139 139 struct page **pgpl) 140 140 { 141 141 struct super_block *sb = rq->sb; 142 - struct z_erofs_stream_dctx dctx = { 143 - .rq = rq, 144 - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, 145 - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) 146 - >> PAGE_SHIFT, 147 - .no = -1, .ni = 0, 148 - }; 142 + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; 149 143 zstd_in_buffer in_buf = { NULL, 0, 0 }; 150 144 zstd_out_buffer out_buf = { NULL, 0, 0 }; 151 145 struct z_erofs_zstd *strm;
+7 -2
fs/erofs/dir.c
··· 58 58 struct erofs_dirent *de; 59 59 unsigned int nameoff, maxsize; 60 60 61 - de = erofs_bread(&buf, dbstart, EROFS_KMAP); 61 + de = erofs_bread(&buf, dbstart, true); 62 62 if (IS_ERR(de)) { 63 - erofs_err(sb, "fail to readdir of logical block %u of nid %llu", 63 + erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", 64 64 erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); 65 65 err = PTR_ERR(de); 66 66 break; ··· 90 90 ofs = 0; 91 91 } 92 92 erofs_put_metabuf(&buf); 93 + if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { 94 + if (!dir_emit_dot(f, ctx)) 95 + return 0; 96 + ++ctx->pos; 97 + } 93 98 return err < 0 ? err : 0; 94 99 } 95 100
+88 -103
fs/erofs/erofs_fs.h
··· 30 30 #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 31 31 #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 32 32 #define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 33 + #define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080 33 34 #define EROFS_ALL_FEATURE_INCOMPAT \ 34 - (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ 35 - EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ 36 - EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ 37 - EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ 38 - EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ 39 - EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ 40 - EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ 41 - EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ 42 - EROFS_FEATURE_INCOMPAT_DEDUPE | \ 43 - EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES) 35 + ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1) 44 36 45 37 #define EROFS_SB_EXTSLOT_SIZE 16 46 38 47 39 struct erofs_deviceslot { 48 40 u8 tag[64]; /* digest(sha256), etc. */ 49 - __le32 blocks; /* total fs blocks of this device */ 50 - __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ 51 - u8 reserved[56]; 41 + __le32 blocks_lo; /* total blocks count of this device */ 42 + __le32 uniaddr_lo; /* unified starting block of this device */ 43 + __le32 blocks_hi; /* total blocks count MSB */ 44 + __le16 uniaddr_hi; /* unified starting block MSB */ 45 + u8 reserved[50]; 52 46 }; 53 47 #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) 54 48 ··· 53 59 __le32 feature_compat; 54 60 __u8 blkszbits; /* filesystem block size in bit shift */ 55 61 __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ 56 - 57 - __le16 root_nid; /* nid of root directory */ 62 + union { 63 + __le16 rootnid_2b; /* nid of root directory */ 64 + __le16 blocks_hi; /* (48BIT on) blocks count MSB */ 65 + } rb; 58 66 __le64 inos; /* total valid ino # (== f_files - f_favail) */ 59 - 60 - __le64 build_time; /* compact inode time derivation */ 61 - __le32 build_time_nsec; /* compact inode time derivation in ns scale */ 62 - __le32 blocks; /* used for statfs */ 67 + __le64 epoch; /* base seconds used for compact inodes */ 68 + __le32 fixed_nsec; /* fixed nanoseconds for compact inodes */ 69 + __le32 blocks_lo; /* blocks count LSB */ 63 70 __le32 meta_blkaddr; /* start block address of metadata area */ 64 71 __le32 xattr_blkaddr; /* start block address of shared xattr area */ 65 72 __u8 uuid[16]; /* 128-bit uuid for volume */ ··· 79 84 __le32 xattr_prefix_start; /* start of long xattr prefixes */ 80 85 __le64 packed_nid; /* nid of the special packed inode */ 81 86 __u8 xattr_filter_reserved; /* reserved for xattr name filter */ 82 - __u8 reserved2[23]; 87 + __u8 reserved[3]; 88 + __le32 build_time; /* seconds added to epoch for mkfs time */ 89 + __le64 rootnid_8b; /* (48BIT on) nid of root directory */ 90 + __u8 reserved2[8]; 83 91 }; 84 92 85 93 /* ··· 113 115 #define EROFS_I_VERSION_MASK 0x01 114 116 #define EROFS_I_DATALAYOUT_MASK 0x07 115 117 116 - #define EROFS_I_VERSION_BIT 0 117 - #define EROFS_I_DATALAYOUT_BIT 1 118 - #define EROFS_I_ALL_BIT 4 119 - 120 - #define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1) 118 + #define EROFS_I_VERSION_BIT 0 119 + #define EROFS_I_DATALAYOUT_BIT 1 120 + #define EROFS_I_NLINK_1_BIT 4 /* non-directory compact inodes only */ 121 + #define EROFS_I_DOT_OMITTED_BIT 4 /* (directories) omit the `.` dirent */ 122 + #define EROFS_I_ALL ((1 << (EROFS_I_NLINK_1_BIT + 1)) - 1) 121 123 122 124 /* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ 123 125 #define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F 124 - /* with chunk indexes or just a 4-byte blkaddr array */ 126 + /* with chunk indexes or just a 4-byte block array */ 125 127 #define EROFS_CHUNK_FORMAT_INDEXES 0x0020 128 + #define EROFS_CHUNK_FORMAT_48BIT 0x0040 126 129 127 - #define EROFS_CHUNK_FORMAT_ALL \ 128 - (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) 130 + #define EROFS_CHUNK_FORMAT_ALL ((EROFS_CHUNK_FORMAT_48BIT << 1) - 1) 129 131 130 132 /* 32-byte on-disk inode */ 131 133 #define EROFS_INODE_LAYOUT_COMPACT 0 ··· 138 140 }; 139 141 140 142 union erofs_inode_i_u { 141 - /* total compressed blocks for compressed inodes */ 142 - __le32 compressed_blocks; 143 - 144 - /* block address for uncompressed flat inodes */ 145 - __le32 raw_blkaddr; 146 - 147 - /* for device files, used to indicate old/new device # */ 148 - __le32 rdev; 149 - 150 - /* for chunk-based files, it contains the summary info */ 143 + __le32 blocks_lo; /* total blocks count (if compressed inodes) */ 144 + __le32 startblk_lo; /* starting block number (if flat inodes) */ 145 + __le32 rdev; /* device ID (if special inodes) */ 151 146 struct erofs_inode_chunk_info c; 147 + }; 148 + 149 + union erofs_inode_i_nb { 150 + __le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */ 151 + __le16 blocks_hi; /* total blocks count MSB */ 152 + __le16 startblk_hi; /* starting block number MSB */ 152 153 }; 153 154 154 155 /* 32-byte reduced form of an ondisk inode */ 155 156 struct erofs_inode_compact { 156 157 __le16 i_format; /* inode format hints */ 157 - 158 - /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ 159 158 __le16 i_xattr_icount; 160 159 __le16 i_mode; 161 - __le16 i_nlink; 160 + union erofs_inode_i_nb i_nb; 162 161 __le32 i_size; 163 - __le32 i_reserved; 162 + __le32 i_mtime; 164 163 union erofs_inode_i_u i_u; 165 164 166 165 __le32 i_ino; /* only used for 32-bit stat compatibility */ 167 166 __le16 i_uid; 168 167 __le16 i_gid; 169 - __le32 i_reserved2; 168 + __le32 i_reserved; 170 169 }; 171 170 172 171 /* 64-byte complete form of an ondisk inode */ 173 172 struct erofs_inode_extended { 174 173 __le16 i_format; /* inode format hints */ 175 - 176 - /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ 177 174 __le16 i_xattr_icount; 178 175 __le16 i_mode; 179 - __le16 i_reserved; 176 + union erofs_inode_i_nb i_nb; 180 177 __le64 i_size; 181 178 union erofs_inode_i_u i_u; 182 179 ··· 241 248 if (!i_xattr_icount) 242 249 return 0; 243 250 251 + /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ 244 252 return sizeof(struct erofs_xattr_ibody_header) + 245 253 sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); 246 254 } ··· 260 266 /* 4-byte block address array */ 261 267 #define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) 262 268 263 - /* 8-byte inode chunk indexes */ 269 + /* 8-byte inode chunk index */ 264 270 struct erofs_inode_chunk_index { 265 - __le16 advise; /* always 0, don't care for now */ 271 + __le16 startblk_hi; /* starting block number MSB */ 266 272 __le16 device_id; /* back-end storage id (with bits masked) */ 267 - __le32 blkaddr; /* start block address of this inode chunk */ 273 + __le32 startblk_lo; /* starting block number of this chunk */ 268 274 }; 269 275 270 276 /* dirent sorts in alphabet order, thus we can do binary search */ ··· 331 337 #define Z_EROFS_ZSTD_MAX_DICT_SIZE Z_EROFS_PCLUSTER_MAX_SIZE 332 338 333 339 /* 334 - * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) 335 - * e.g. for 4k logical cluster size, 4B if compacted 2B is off; 336 - * (4B) + 2B + (4B) if compacted 2B is on. 337 - * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) 338 - * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) 339 - * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) 340 - * bit 4 : interlaced plain pcluster (0 - off; 1 - on) 341 - * bit 5 : fragment pcluster (0 - off; 1 - on) 340 + * Enable COMPACTED_2B for EROFS_INODE_COMPRESSED_COMPACT inodes: 341 + * 4B (disabled) vs 4B+2B+4B (enabled) 342 342 */ 343 343 #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 344 + /* Enable extent metadata for EROFS_INODE_COMPRESSED_FULL inodes */ 345 + #define Z_EROFS_ADVISE_EXTENTS 0x0001 344 346 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 345 347 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 346 348 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 347 349 #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 348 350 #define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 351 + /* Indicate the record size for each extent if extent metadata is used */ 352 + #define Z_EROFS_ADVISE_EXTRECSZ_BIT 1 353 + #define Z_EROFS_ADVISE_EXTRECSZ_MASK 0x3 349 354 350 355 #define Z_EROFS_FRAGMENT_INODE_BIT 7 351 356 struct z_erofs_map_header { ··· 356 363 /* indicates the encoded size of tailpacking data */ 357 364 __le16 h_idata_size; 358 365 }; 366 + __le32 h_extents_lo; /* extent count LSB */ 359 367 }; 360 368 __le16 h_advise; 361 - /* 362 - * bit 0-3 : algorithm type of head 1 (logical cluster type 01); 363 - * bit 4-7 : algorithm type of head 2 (logical cluster type 11). 364 - */ 365 - __u8 h_algorithmtype; 366 - /* 367 - * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; 368 - * bit 3-6 : reserved; 369 - * bit 7 : move the whole file into packed inode or not. 370 - */ 371 - __u8 h_clusterbits; 369 + union { 370 + struct { 371 + /* algorithm type (bit 0-3: HEAD1; bit 4-7: HEAD2) */ 372 + __u8 h_algorithmtype; 373 + /* 374 + * bit 0-3 : logical cluster bits - blkszbits 375 + * bit 4-6 : reserved 376 + * bit 7 : pack the whole file into packed inode 377 + */ 378 + __u8 h_clusterbits; 379 + }; 380 + __le16 h_extents_hi; /* extent count MSB */ 381 + }; 372 382 }; 373 383 374 - /* 375 - * On-disk logical cluster type: 376 - * 0 - literal (uncompressed) lcluster 377 - * 1,3 - compressed lcluster (for HEAD lclusters) 378 - * 2 - compressed lcluster (for NONHEAD lclusters) 379 - * 380 - * In detail, 381 - * 0 - literal (uncompressed) lcluster, 382 - * di_advise = 0 383 - * di_clusterofs = the literal data offset of the lcluster 384 - * di_blkaddr = the blkaddr of the literal pcluster 385 - * 386 - * 1,3 - compressed lcluster (for HEAD lclusters) 387 - * di_advise = 1 or 3 388 - * di_clusterofs = the decompressed data offset of the lcluster 389 - * di_blkaddr = the blkaddr of the compressed pcluster 390 - * 391 - * 2 - compressed lcluster (for NONHEAD lclusters) 392 - * di_advise = 2 393 - * di_clusterofs = 394 - * the decompressed data offset in its own HEAD lcluster 395 - * di_u.delta[0] = distance to this HEAD lcluster 396 - * di_u.delta[1] = distance to the next HEAD lcluster 397 - */ 398 384 enum { 399 385 Z_EROFS_LCLUSTER_TYPE_PLAIN = 0, 400 386 Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1, ··· 387 415 /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ 388 416 #define Z_EROFS_LI_PARTIAL_REF (1 << 15) 389 417 390 - /* 391 - * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the 392 - * compressed block count of a compressed extent (in logical clusters, aka. 393 - * block count of a pcluster). 394 - */ 418 + /* Set on 1st non-head lcluster to store compressed block counti (in blocks) */ 395 419 #define Z_EROFS_LI_D0_CBLKCNT (1 << 11) 396 420 397 421 struct z_erofs_lcluster_index { ··· 396 428 __le16 di_clusterofs; 397 429 398 430 union { 399 - /* for the HEAD lclusters */ 400 - __le32 blkaddr; 431 + __le32 blkaddr; /* for the HEAD lclusters */ 401 432 /* 402 - * for the NONHEAD lclusters 403 433 * [0] - distance to its HEAD lcluster 404 434 * [1] - distance to the next HEAD lcluster 405 435 */ 406 - __le16 delta[2]; 436 + __le16 delta[2]; /* for the NONHEAD lclusters */ 407 437 } di_u; 408 438 }; 409 439 410 - #define Z_EROFS_FULL_INDEX_ALIGN(end) \ 411 - (ALIGN(end, 8) + sizeof(struct z_erofs_map_header) + 8) 440 + #define Z_EROFS_MAP_HEADER_END(end) \ 441 + (ALIGN(end, 8) + sizeof(struct z_erofs_map_header)) 442 + #define Z_EROFS_FULL_INDEX_START(end) (Z_EROFS_MAP_HEADER_END(end) + 8) 443 + 444 + #define Z_EROFS_EXTENT_PLEN_PARTIAL BIT(27) 445 + #define Z_EROFS_EXTENT_PLEN_FMT_BIT 28 446 + #define Z_EROFS_EXTENT_PLEN_MASK ((Z_EROFS_PCLUSTER_MAX_SIZE << 1) - 1) 447 + struct z_erofs_extent { 448 + __le32 plen; /* encoded length */ 449 + __le32 pstart_lo; /* physical offset */ 450 + __le32 pstart_hi; /* physical offset MSB */ 451 + __le32 lstart_lo; /* logical offset */ 452 + __le32 lstart_hi; /* logical offset MSB (>= 4GiB inodes) */ 453 + __u8 reserved[12]; /* for future use */ 454 + }; 455 + 456 + static inline int z_erofs_extent_recsize(unsigned int advise) 457 + { 458 + return 4 << ((advise >> Z_EROFS_ADVISE_EXTRECSZ_BIT) & 459 + Z_EROFS_ADVISE_EXTRECSZ_MASK); 460 + } 412 461 413 462 /* check the EROFS on-disk layout strictly at compile time */ 414 463 static inline void erofs_check_ondisk_layout_definitions(void)
+1 -1
fs/erofs/fileio.c
··· 112 112 void *src; 113 113 114 114 src = erofs_read_metabuf(&buf, inode->i_sb, 115 - map->m_pa + ofs, EROFS_KMAP); 115 + map->m_pa + ofs, true); 116 116 if (IS_ERR(src)) { 117 117 err = PTR_ERR(src); 118 118 break;
+1 -1
fs/erofs/fscache.c
··· 276 276 size_t size = map.m_llen; 277 277 void *src; 278 278 279 - src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP); 279 + src = erofs_read_metabuf(&buf, sb, map.m_pa, true); 280 280 if (IS_ERR(src)) 281 281 return PTR_ERR(src); 282 282
+62 -67
fs/erofs/inode.c
··· 27 27 static int erofs_read_inode(struct inode *inode) 28 28 { 29 29 struct super_block *sb = inode->i_sb; 30 - struct erofs_sb_info *sbi = EROFS_SB(sb); 31 - struct erofs_inode *vi = EROFS_I(inode); 32 - const erofs_off_t inode_loc = erofs_iloc(inode); 33 - erofs_blk_t blkaddr, nblks = 0; 34 - void *kaddr; 35 - struct erofs_inode_compact *dic; 36 - struct erofs_inode_extended *die, *copied = NULL; 37 - union erofs_inode_i_u iu; 30 + erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode)); 31 + unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode)); 38 32 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 39 - unsigned int ifmt, ofs; 33 + struct erofs_sb_info *sbi = EROFS_SB(sb); 34 + erofs_blk_t addrmask = BIT_ULL(48) - 1; 35 + struct erofs_inode *vi = EROFS_I(inode); 36 + struct erofs_inode_extended *die, copied; 37 + struct erofs_inode_compact *dic; 38 + unsigned int ifmt; 39 + void *ptr; 40 40 int err = 0; 41 41 42 - blkaddr = erofs_blknr(sb, inode_loc); 43 - ofs = erofs_blkoff(sb, inode_loc); 44 - 45 - kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), EROFS_KMAP); 46 - if (IS_ERR(kaddr)) { 47 - erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", 48 - vi->nid, PTR_ERR(kaddr)); 49 - return PTR_ERR(kaddr); 42 + ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true); 43 + if (IS_ERR(ptr)) { 44 + err = PTR_ERR(ptr); 45 + erofs_err(sb, "failed to get inode (nid: %llu) page, err %d", 46 + vi->nid, err); 47 + goto err_out; 50 48 } 51 49 52 - dic = kaddr + ofs; 50 + dic = ptr + ofs; 53 51 ifmt = le16_to_cpu(dic->i_format); 54 52 if (ifmt & ~EROFS_I_ALL) { 55 53 erofs_err(sb, "unsupported i_format %u of nid %llu", ··· 71 73 if (ofs + vi->inode_isize <= sb->s_blocksize) { 72 74 ofs += vi->inode_isize; 73 75 die = (struct erofs_inode_extended *)dic; 76 + copied.i_u = die->i_u; 77 + copied.i_nb = die->i_nb; 74 78 } else { 75 79 const unsigned int gotten = sb->s_blocksize - ofs; 76 80 77 - copied = kmalloc(vi->inode_isize, GFP_KERNEL); 78 - if (!copied) { 79 - err = -ENOMEM; 81 + memcpy(&copied, dic, gotten); 82 + ptr = erofs_read_metabuf(&buf, sb, 83 + erofs_pos(sb, blkaddr + 1), true); 84 + if (IS_ERR(ptr)) { 85 + err = PTR_ERR(ptr); 86 + erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d", 87 + vi->nid, err); 80 88 goto err_out; 81 89 } 82 - memcpy(copied, dic, gotten); 83 - kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr + 1), 84 - EROFS_KMAP); 85 - if (IS_ERR(kaddr)) { 86 - erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", 87 - vi->nid, PTR_ERR(kaddr)); 88 - kfree(copied); 89 - return PTR_ERR(kaddr); 90 - } 91 90 ofs = vi->inode_isize - gotten; 92 - memcpy((u8 *)copied + gotten, kaddr, ofs); 93 - die = copied; 91 + memcpy((u8 *)&copied + gotten, ptr, ofs); 92 + die = &copied; 94 93 } 95 94 vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); 96 95 97 96 inode->i_mode = le16_to_cpu(die->i_mode); 98 - iu = die->i_u; 99 97 i_uid_write(inode, le32_to_cpu(die->i_uid)); 100 98 i_gid_write(inode, le32_to_cpu(die->i_gid)); 101 99 set_nlink(inode, le32_to_cpu(die->i_nlink)); 102 - /* each extended inode has its own timestamp */ 103 - inode_set_ctime(inode, le64_to_cpu(die->i_mtime), 100 + inode_set_mtime(inode, le64_to_cpu(die->i_mtime), 104 101 le32_to_cpu(die->i_mtime_nsec)); 105 102 106 103 inode->i_size = le64_to_cpu(die->i_size); 107 - kfree(copied); 108 104 break; 109 105 case EROFS_INODE_LAYOUT_COMPACT: 110 106 vi->inode_isize = sizeof(struct erofs_inode_compact); ··· 106 114 vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); 107 115 108 116 inode->i_mode = le16_to_cpu(dic->i_mode); 109 - iu = dic->i_u; 117 + copied.i_u = dic->i_u; 110 118 i_uid_write(inode, le16_to_cpu(dic->i_uid)); 111 119 i_gid_write(inode, le16_to_cpu(dic->i_gid)); 112 - set_nlink(inode, le16_to_cpu(dic->i_nlink)); 113 - /* use build time for compact inodes */ 114 - inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec); 120 + if (!S_ISDIR(inode->i_mode) && 121 + ((ifmt >> EROFS_I_NLINK_1_BIT) & 1)) { 122 + set_nlink(inode, 1); 123 + copied.i_nb = dic->i_nb; 124 + } else { 125 + set_nlink(inode, le16_to_cpu(dic->i_nb.nlink)); 126 + copied.i_nb.startblk_hi = 0; 127 + addrmask = BIT_ULL(32) - 1; 128 + } 129 + inode_set_mtime(inode, sbi->epoch + le32_to_cpu(dic->i_mtime), 130 + sbi->fixed_nsec); 115 131 116 132 inode->i_size = le32_to_cpu(dic->i_size); 117 133 break; ··· 136 136 goto err_out; 137 137 } 138 138 switch (inode->i_mode & S_IFMT) { 139 - case S_IFREG: 140 139 case S_IFDIR: 140 + vi->dot_omitted = (ifmt >> EROFS_I_DOT_OMITTED_BIT) & 1; 141 + fallthrough; 142 + case S_IFREG: 141 143 case S_IFLNK: 142 - vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr); 144 + vi->startblk = le32_to_cpu(copied.i_u.startblk_lo) | 145 + ((u64)le16_to_cpu(copied.i_nb.startblk_hi) << 32); 146 + if (vi->datalayout == EROFS_INODE_FLAT_PLAIN && 147 + !((vi->startblk ^ EROFS_NULL_ADDR) & addrmask)) 148 + vi->startblk = EROFS_NULL_ADDR; 149 + 143 150 if(S_ISLNK(inode->i_mode)) { 144 - err = erofs_fill_symlink(inode, kaddr, ofs); 151 + err = erofs_fill_symlink(inode, ptr, ofs); 145 152 if (err) 146 153 goto err_out; 147 154 } 148 155 break; 149 156 case S_IFCHR: 150 157 case S_IFBLK: 151 - inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev)); 158 + inode->i_rdev = new_decode_dev(le32_to_cpu(copied.i_u.rdev)); 152 159 break; 153 160 case S_IFIFO: 154 161 case S_IFSOCK: ··· 168 161 goto err_out; 169 162 } 170 163 171 - /* total blocks for compressed files */ 172 - if (erofs_inode_is_data_compressed(vi->datalayout)) { 173 - nblks = le32_to_cpu(iu.compressed_blocks); 174 - } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { 164 + if (erofs_inode_is_data_compressed(vi->datalayout)) 165 + inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) << 166 + (sb->s_blocksize_bits - 9); 167 + else 168 + inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; 169 + 170 + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { 175 171 /* fill chunked inode summary info */ 176 - vi->chunkformat = le16_to_cpu(iu.c.format); 172 + vi->chunkformat = le16_to_cpu(copied.i_u.c.format); 177 173 if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { 178 174 erofs_err(sb, "unsupported chunk format %x of nid %llu", 179 175 vi->chunkformat, vi->nid); ··· 186 176 vi->chunkbits = sb->s_blocksize_bits + 187 177 (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); 188 178 } 189 - inode_set_mtime_to_ts(inode, 190 - inode_set_atime_to_ts(inode, inode_get_ctime(inode))); 179 + inode_set_atime_to_ts(inode, 180 + inode_set_ctime_to_ts(inode, inode_get_mtime(inode))); 191 181 192 182 inode->i_flags &= ~S_DAX; 193 183 if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && 194 184 (vi->datalayout == EROFS_INODE_FLAT_PLAIN || 195 185 vi->datalayout == EROFS_INODE_CHUNK_BASED)) 196 186 inode->i_flags |= S_DAX; 197 - 198 - if (!nblks) 199 - /* measure inode.i_blocks as generic filesystems */ 200 - inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; 201 - else 202 - inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); 203 187 err_out: 204 - DBG_BUGON(err); 205 188 erofs_put_metabuf(&buf); 206 189 return err; 207 190 } ··· 205 202 int err; 206 203 207 204 trace_erofs_fill_inode(inode); 208 - 209 - /* read inode base data from disk */ 210 205 err = erofs_read_inode(inode); 211 206 if (err) 212 207 return err; 213 208 214 - /* setup the new inode */ 215 209 switch (inode->i_mode & S_IFMT) { 216 210 case S_IFREG: 217 211 inode->i_op = &erofs_generic_iops; ··· 229 229 inode->i_op = &erofs_symlink_iops; 230 230 inode_nohighmem(inode); 231 231 break; 232 - case S_IFCHR: 233 - case S_IFBLK: 234 - case S_IFIFO: 235 - case S_IFSOCK: 232 + default: 236 233 inode->i_op = &erofs_generic_iops; 237 234 init_special_inode(inode, inode->i_mode, inode->i_rdev); 238 235 return 0; 239 - default: 240 - return -EFSCORRUPTED; 241 236 } 242 237 243 238 mapping_set_large_folios(inode->i_mapping);
+19 -28
fs/erofs/internal.h
··· 37 37 38 38 typedef u64 erofs_nid_t; 39 39 typedef u64 erofs_off_t; 40 - /* data type for filesystem-wide blocks number */ 41 - typedef u32 erofs_blk_t; 40 + typedef u64 erofs_blk_t; 42 41 43 42 struct erofs_device_info { 44 43 char *path; ··· 46 47 struct dax_device *dax_dev; 47 48 u64 dax_part_off; 48 49 49 - u32 blocks; 50 - u32 mapped_blkaddr; 50 + erofs_blk_t blocks; 51 + erofs_blk_t uniaddr; 51 52 }; 52 53 53 54 enum { ··· 142 143 unsigned char blkszbits; /* filesystem block size in bit shift */ 143 144 144 145 u32 sb_size; /* total superblock size */ 145 - u32 build_time_nsec; 146 - u64 build_time; 146 + u32 fixed_nsec; 147 + s64 epoch; 147 148 148 149 /* what we really care is nid, rather than ino.. */ 149 150 erofs_nid_t root_nid; ··· 151 152 /* used for statfs, f_files - f_favail */ 152 153 u64 inos; 153 154 154 - u8 uuid[16]; /* 128-bit uuid for volume */ 155 - u8 volume_name[16]; /* volume name */ 156 155 u32 feature_compat; 157 156 u32 feature_incompat; 158 157 ··· 196 199 EROFS_ZIP_CACHE_READAROUND 197 200 }; 198 201 199 - enum erofs_kmap_type { 200 - EROFS_NO_KMAP, /* don't map the buffer */ 201 - EROFS_KMAP, /* use kmap_local_page() to map the buffer */ 202 - }; 203 - 204 202 struct erofs_buf { 205 203 struct address_space *mapping; 206 204 struct file *file; ··· 204 212 }; 205 213 #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) 206 214 207 - #define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) 208 - #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) 215 + #define erofs_blknr(sb, pos) ((erofs_blk_t)((pos) >> (sb)->s_blocksize_bits)) 216 + #define erofs_blkoff(sb, pos) ((pos) & ((sb)->s_blocksize - 1)) 209 217 #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) 210 218 #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) 211 219 ··· 225 233 EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) 226 234 EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) 227 235 EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) 236 + EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) 228 237 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) 229 238 EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) 230 239 ··· 245 252 246 253 unsigned char datalayout; 247 254 unsigned char inode_isize; 255 + bool dot_omitted; 248 256 unsigned int xattr_isize; 249 257 250 258 unsigned int xattr_name_filter; ··· 253 259 unsigned int *xattr_shared_xattrs; 254 260 255 261 union { 256 - erofs_blk_t raw_blkaddr; 262 + erofs_blk_t startblk; 257 263 struct { 258 264 unsigned short chunkformat; 259 265 unsigned char chunkbits; ··· 262 268 struct { 263 269 unsigned short z_advise; 264 270 unsigned char z_algorithmtype[2]; 265 - unsigned char z_logical_clusterbits; 266 - unsigned long z_tailextent_headlcn; 271 + unsigned char z_lclusterbits; 267 272 union { 268 - struct { 269 - erofs_off_t z_idataoff; 270 - unsigned short z_idata_size; 271 - }; 272 - erofs_off_t z_fragmentoff; 273 + u64 z_tailextent_headlcn; 274 + u64 z_extents; 273 275 }; 276 + erofs_off_t z_fragmentoff; 277 + unsigned short z_idata_size; 274 278 }; 275 279 #endif /* CONFIG_EROFS_FS_ZIP */ 276 280 }; ··· 379 387 erofs_off_t *offset, int *lengthp); 380 388 void erofs_unmap_metabuf(struct erofs_buf *buf); 381 389 void erofs_put_metabuf(struct erofs_buf *buf); 382 - void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, 383 - enum erofs_kmap_type type); 390 + void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap); 384 391 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb); 385 392 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 386 - erofs_off_t offset, enum erofs_kmap_type type); 393 + erofs_off_t offset, bool need_kmap); 387 394 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); 388 395 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 389 396 u64 start, u64 len); ··· 439 448 void erofs_exit_shrinker(void); 440 449 int __init z_erofs_init_subsystem(void); 441 450 void z_erofs_exit_subsystem(void); 451 + int z_erofs_init_super(struct super_block *sb); 442 452 unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, 443 453 unsigned long nr_shrink); 444 454 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, ··· 449 457 int z_erofs_gbuf_growsize(unsigned int nrpages); 450 458 int __init z_erofs_gbuf_init(void); 451 459 void z_erofs_gbuf_exit(void); 452 - int erofs_init_managed_cache(struct super_block *sb); 453 460 int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); 454 461 #else 455 462 static inline void erofs_shrinker_register(struct super_block *sb) {} ··· 457 466 static inline void erofs_exit_shrinker(void) {} 458 467 static inline int z_erofs_init_subsystem(void) { return 0; } 459 468 static inline void z_erofs_exit_subsystem(void) {} 460 - static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } 469 + static inline int z_erofs_init_super(struct super_block *sb) { return 0; } 461 470 #endif /* !CONFIG_EROFS_FS_ZIP */ 462 471 463 472 #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
+1 -1
fs/erofs/namei.c
··· 100 100 struct erofs_dirent *de; 101 101 102 102 buf.mapping = dir->i_mapping; 103 - de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), EROFS_KMAP); 103 + de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), true); 104 104 if (!IS_ERR(de)) { 105 105 const int nameoff = nameoff_from_disk(de->nameoff, bsz); 106 106 const int ndirents = nameoff / sizeof(*de);
+41 -44
fs/erofs/super.c
··· 94 94 int len, i, cnt; 95 95 96 96 *offset = round_up(*offset, 4); 97 - ptr = erofs_bread(buf, *offset, EROFS_KMAP); 97 + ptr = erofs_bread(buf, *offset, true); 98 98 if (IS_ERR(ptr)) 99 99 return ptr; 100 100 ··· 110 110 for (i = 0; i < len; i += cnt) { 111 111 cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset), 112 112 len - i); 113 - ptr = erofs_bread(buf, *offset, EROFS_KMAP); 113 + ptr = erofs_bread(buf, *offset, true); 114 114 if (IS_ERR(ptr)) { 115 115 kfree(buffer); 116 116 return ptr; ··· 141 141 struct erofs_deviceslot *dis; 142 142 struct file *file; 143 143 144 - dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP); 144 + dis = erofs_read_metabuf(buf, sb, *pos, true); 145 145 if (IS_ERR(dis)) 146 146 return PTR_ERR(dis); 147 147 ··· 178 178 dif->file = file; 179 179 } 180 180 181 - dif->blocks = le32_to_cpu(dis->blocks); 182 - dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); 181 + dif->blocks = le32_to_cpu(dis->blocks_lo); 182 + dif->uniaddr = le32_to_cpu(dis->uniaddr_lo); 183 183 sbi->total_blocks += dif->blocks; 184 184 *pos += EROFS_DEVT_SLOT_SIZE; 185 185 return 0; ··· 255 255 void *data; 256 256 int ret; 257 257 258 - data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 258 + data = erofs_read_metabuf(&buf, sb, 0, true); 259 259 if (IS_ERR(data)) { 260 260 erofs_err(sb, "cannot read erofs superblock"); 261 261 return PTR_ERR(data); ··· 268 268 goto out; 269 269 } 270 270 271 - sbi->blkszbits = dsb->blkszbits; 271 + sbi->blkszbits = dsb->blkszbits; 272 272 if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) { 273 273 erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits); 274 274 goto out; ··· 299 299 sbi->sb_size); 300 300 goto out; 301 301 } 302 - sbi->dif0.blocks = le32_to_cpu(dsb->blocks); 302 + sbi->dif0.blocks = le32_to_cpu(dsb->blocks_lo); 303 303 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 304 304 #ifdef CONFIG_EROFS_FS_XATTR 305 305 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); ··· 308 308 sbi->xattr_filter_reserved = dsb->xattr_filter_reserved; 309 309 #endif 310 310 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 311 - sbi->root_nid = le16_to_cpu(dsb->root_nid); 311 + if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { 312 + sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); 313 + sbi->dif0.blocks = (sbi->dif0.blocks << 32) | 314 + le16_to_cpu(dsb->rb.blocks_hi); 315 + } else { 316 + sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); 317 + } 312 318 sbi->packed_nid = le64_to_cpu(dsb->packed_nid); 313 319 sbi->inos = le64_to_cpu(dsb->inos); 314 320 315 - sbi->build_time = le64_to_cpu(dsb->build_time); 316 - sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); 317 - 321 + sbi->epoch = (s64)le64_to_cpu(dsb->epoch); 322 + sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec); 318 323 super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); 319 - 320 - ret = strscpy(sbi->volume_name, dsb->volume_name, 321 - sizeof(dsb->volume_name)); 322 - if (ret < 0) { /* -E2BIG */ 323 - erofs_err(sb, "bad volume name without NIL terminator"); 324 - ret = -EFSCORRUPTED; 325 - goto out; 326 - } 327 324 328 325 /* parse on-disk compression configurations */ 329 326 ret = z_erofs_parse_cfgs(sb, dsb); ··· 330 333 /* handle multiple devices */ 331 334 ret = erofs_scan_devices(sb, dsb); 332 335 336 + if (erofs_sb_has_48bit(sbi)) 337 + erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); 333 338 if (erofs_is_fscache_mode(sb)) 334 339 erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!"); 335 340 out: ··· 638 639 else 639 640 sb->s_flags &= ~SB_POSIXACL; 640 641 641 - #ifdef CONFIG_EROFS_FS_ZIP 642 - xa_init(&sbi->managed_pslots); 643 - #endif 642 + err = z_erofs_init_super(sb); 643 + if (err) 644 + return err; 645 + 646 + if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { 647 + inode = erofs_iget(sb, sbi->packed_nid); 648 + if (IS_ERR(inode)) 649 + return PTR_ERR(inode); 650 + sbi->packed_inode = inode; 651 + } 644 652 645 653 inode = erofs_iget(sb, sbi->root_nid); 646 654 if (IS_ERR(inode)) ··· 659 653 iput(inode); 660 654 return -EINVAL; 661 655 } 662 - 663 656 sb->s_root = d_make_root(inode); 664 657 if (!sb->s_root) 665 658 return -ENOMEM; 666 659 667 660 erofs_shrinker_register(sb); 668 - if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { 669 - sbi->packed_inode = erofs_iget(sb, sbi->packed_nid); 670 - if (IS_ERR(sbi->packed_inode)) { 671 - err = PTR_ERR(sbi->packed_inode); 672 - sbi->packed_inode = NULL; 673 - return err; 674 - } 675 - } 676 - err = erofs_init_managed_cache(sb); 677 - if (err) 678 - return err; 679 - 680 661 err = erofs_xattr_prefixes_init(sb); 681 662 if (err) 682 663 return err; ··· 799 806 return 0; 800 807 } 801 808 809 + static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) 810 + { 811 + iput(sbi->packed_inode); 812 + sbi->packed_inode = NULL; 813 + #ifdef CONFIG_EROFS_FS_ZIP 814 + iput(sbi->managed_cache); 815 + sbi->managed_cache = NULL; 816 + #endif 817 + } 818 + 802 819 static void erofs_kill_sb(struct super_block *sb) 803 820 { 804 821 struct erofs_sb_info *sbi = EROFS_SB(sb); ··· 818 815 kill_anon_super(sb); 819 816 else 820 817 kill_block_super(sb); 818 + erofs_drop_internal_inodes(sbi); 821 819 fs_put_dax(sbi->dif0.dax_dev, NULL); 822 820 erofs_fscache_unregister_fs(sb); 823 821 erofs_sb_free(sbi); ··· 829 825 { 830 826 struct erofs_sb_info *const sbi = EROFS_SB(sb); 831 827 832 - DBG_BUGON(!sbi); 833 - 834 828 erofs_unregister_sysfs(sb); 835 829 erofs_shrinker_unregister(sb); 836 830 erofs_xattr_prefixes_cleanup(sb); 837 - #ifdef CONFIG_EROFS_FS_ZIP 838 - iput(sbi->managed_cache); 839 - sbi->managed_cache = NULL; 840 - #endif 841 - iput(sbi->packed_inode); 842 - sbi->packed_inode = NULL; 831 + erofs_drop_internal_inodes(sbi); 843 832 erofs_free_dev_context(sbi->devs); 844 833 sbi->devs = NULL; 845 834 erofs_fscache_unregister_fs(sb);
+2
fs/erofs/sysfs.c
··· 81 81 EROFS_ATTR_FEATURE(ztailpacking); 82 82 EROFS_ATTR_FEATURE(fragments); 83 83 EROFS_ATTR_FEATURE(dedupe); 84 + EROFS_ATTR_FEATURE(48bit); 84 85 85 86 static struct attribute *erofs_feat_attrs[] = { 86 87 ATTR_LIST(zero_padding), ··· 94 93 ATTR_LIST(ztailpacking), 95 94 ATTR_LIST(fragments), 96 95 ATTR_LIST(dedupe), 96 + ATTR_LIST(48bit), 97 97 NULL, 98 98 }; 99 99 ATTRIBUTE_GROUPS(erofs_feat);
+6 -6
fs/erofs/xattr.c
··· 81 81 it.pos = erofs_iloc(inode) + vi->inode_isize; 82 82 83 83 /* read in shared xattr array (non-atomic, see kmalloc below) */ 84 - it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP); 84 + it.kaddr = erofs_bread(&it.buf, it.pos, true); 85 85 if (IS_ERR(it.kaddr)) { 86 86 ret = PTR_ERR(it.kaddr); 87 87 goto out_unlock; ··· 102 102 it.pos += sizeof(struct erofs_xattr_ibody_header); 103 103 104 104 for (i = 0; i < vi->xattr_shared_count; ++i) { 105 - it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP); 105 + it.kaddr = erofs_bread(&it.buf, it.pos, true); 106 106 if (IS_ERR(it.kaddr)) { 107 107 kfree(vi->xattr_shared_xattrs); 108 108 vi->xattr_shared_xattrs = NULL; ··· 183 183 void *src; 184 184 185 185 for (processed = 0; processed < len; processed += slice) { 186 - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); 186 + it->kaddr = erofs_bread(&it->buf, it->pos, true); 187 187 if (IS_ERR(it->kaddr)) 188 188 return PTR_ERR(it->kaddr); 189 189 ··· 286 286 287 287 /* 2. handle xattr name */ 288 288 for (processed = 0; processed < entry.e_name_len; processed += slice) { 289 - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); 289 + it->kaddr = erofs_bread(&it->buf, it->pos, true); 290 290 if (IS_ERR(it->kaddr)) 291 291 return PTR_ERR(it->kaddr); 292 292 ··· 330 330 it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz; 331 331 332 332 while (remaining) { 333 - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); 333 + it->kaddr = erofs_bread(&it->buf, it->pos, true); 334 334 if (IS_ERR(it->kaddr)) 335 335 return PTR_ERR(it->kaddr); 336 336 ··· 367 367 for (i = 0; i < vi->xattr_shared_count; ++i) { 368 368 it->pos = erofs_pos(sb, sbi->xattr_blkaddr) + 369 369 vi->xattr_shared_xattrs[i] * sizeof(__le32); 370 - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); 370 + it->kaddr = erofs_bread(&it->buf, it->pos, true); 371 371 if (IS_ERR(it->kaddr)) 372 372 return PTR_ERR(it->kaddr); 373 373
+51 -51
fs/erofs/zdata.c
··· 44 44 /* A: point to next chained pcluster or TAILs */ 45 45 struct z_erofs_pcluster *next; 46 46 47 - /* I: start block address of this pcluster */ 48 - erofs_off_t index; 47 + /* I: start physical position of this pcluster */ 48 + erofs_off_t pos; 49 49 50 50 /* L: the maximum decompression size of this round */ 51 51 unsigned int length; ··· 72 72 73 73 /* I: compression algorithm format */ 74 74 unsigned char algorithmformat; 75 + 76 + /* I: whether compressed data is in-lined or not */ 77 + bool from_meta; 75 78 76 79 /* L: whether partial decompression or not */ 77 80 bool partial; ··· 105 102 bool eio, sync; 106 103 }; 107 104 108 - static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl) 109 - { 110 - return !pcl->index; 111 - } 112 - 113 105 static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) 114 106 { 115 - return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; 107 + return PAGE_ALIGN(pcl->pageofs_in + pcl->pclustersize) >> PAGE_SHIFT; 116 108 } 117 109 118 110 static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) ··· 131 133 132 134 static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { 133 135 _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), 134 - _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) 136 + _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES + 1) 135 137 }; 136 138 137 139 struct z_erofs_bvec_iter { ··· 265 267 pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); 266 268 if (!pcl) 267 269 return ERR_PTR(-ENOMEM); 268 - pcl->pclustersize = size; 269 270 return pcl; 270 271 } 271 272 return ERR_PTR(-EINVAL); ··· 513 516 struct z_erofs_pcluster *pcl = fe->pcl; 514 517 unsigned int pclusterpages = z_erofs_pclusterpages(pcl); 515 518 bool shouldalloc = z_erofs_should_alloc_cache(fe); 519 + pgoff_t poff = pcl->pos >> PAGE_SHIFT; 516 520 bool may_bypass = true; 517 521 /* Optimistic allocation, as in-place I/O can be used as a fallback */ 518 522 gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | ··· 530 532 if (READ_ONCE(pcl->compressed_bvecs[i].page)) 531 533 continue; 532 534 533 - folio = filemap_get_folio(mc, pcl->index + i); 535 + folio = filemap_get_folio(mc, poff + i); 534 536 if (IS_ERR(folio)) { 535 537 may_bypass = false; 536 538 if (!shouldalloc) ··· 573 575 struct folio *folio; 574 576 int i; 575 577 576 - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); 578 + DBG_BUGON(pcl->from_meta); 577 579 /* Each cached folio contains one page unless bs > ps is supported */ 578 580 for (i = 0; i < pclusterpages; ++i) { 579 581 if (pcl->compressed_bvecs[i].page) { ··· 605 607 ret = false; 606 608 spin_lock(&pcl->lockref.lock); 607 609 if (pcl->lockref.count <= 0) { 608 - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); 610 + DBG_BUGON(pcl->from_meta); 609 611 for (; bvec < end; ++bvec) { 610 612 if (bvec->page && page_folio(bvec->page) == folio) { 611 613 bvec->page = NULL; ··· 642 644 .invalidate_folio = z_erofs_cache_invalidate_folio, 643 645 }; 644 646 645 - int erofs_init_managed_cache(struct super_block *sb) 647 + int z_erofs_init_super(struct super_block *sb) 646 648 { 647 649 struct inode *const inode = new_inode(sb); 648 650 649 651 if (!inode) 650 652 return -ENOMEM; 651 - 652 653 set_nlink(inode, 1); 653 654 inode->i_size = OFFSET_MAX; 654 655 inode->i_mapping->a_ops = &z_erofs_cache_aops; 655 656 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 656 657 EROFS_SB(sb)->managed_cache = inode; 658 + xa_init(&EROFS_SB(sb)->managed_pslots); 657 659 return 0; 658 660 } 659 661 ··· 665 667 int ret; 666 668 667 669 if (exclusive) { 668 - /* give priority for inplaceio to use file pages first */ 669 - spin_lock(&pcl->lockref.lock); 670 - while (fe->icur > 0) { 671 - if (pcl->compressed_bvecs[--fe->icur].page) 672 - continue; 673 - pcl->compressed_bvecs[fe->icur] = *bvec; 670 + /* Inplace I/O is limited to one page for uncompressed data */ 671 + if (pcl->algorithmformat < Z_EROFS_COMPRESSION_MAX || 672 + fe->icur <= 1) { 673 + /* Try to prioritize inplace I/O here */ 674 + spin_lock(&pcl->lockref.lock); 675 + while (fe->icur > 0) { 676 + if (pcl->compressed_bvecs[--fe->icur].page) 677 + continue; 678 + pcl->compressed_bvecs[fe->icur] = *bvec; 679 + spin_unlock(&pcl->lockref.lock); 680 + return 0; 681 + } 674 682 spin_unlock(&pcl->lockref.lock); 675 - return 0; 676 683 } 677 - spin_unlock(&pcl->lockref.lock); 678 684 679 685 /* otherwise, check if it can be used as a bvpage */ 680 686 if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && ··· 713 711 struct erofs_map_blocks *map = &fe->map; 714 712 struct super_block *sb = fe->inode->i_sb; 715 713 struct erofs_sb_info *sbi = EROFS_SB(sb); 716 - bool ztailpacking = map->m_flags & EROFS_MAP_META; 717 714 struct z_erofs_pcluster *pcl, *pre; 715 + unsigned int pageofs_in; 718 716 int err; 719 717 720 - if (!(map->m_flags & EROFS_MAP_ENCODED) || 721 - (!ztailpacking && !erofs_blknr(sb, map->m_pa))) { 722 - DBG_BUGON(1); 723 - return -EFSCORRUPTED; 724 - } 725 - 726 - /* no available pcluster, let's allocate one */ 727 - pcl = z_erofs_alloc_pcluster(map->m_plen); 718 + pageofs_in = erofs_blkoff(sb, map->m_pa); 719 + pcl = z_erofs_alloc_pcluster(pageofs_in + map->m_plen); 728 720 if (IS_ERR(pcl)) 729 721 return PTR_ERR(pcl); 730 722 731 723 lockref_init(&pcl->lockref); /* one ref for this request */ 732 724 pcl->algorithmformat = map->m_algorithmformat; 725 + pcl->pclustersize = map->m_plen; 726 + pcl->pageofs_in = pageofs_in; 733 727 pcl->length = 0; 734 728 pcl->partial = true; 735 729 pcl->next = fe->head; 730 + pcl->pos = map->m_pa; 731 + pcl->pageofs_in = pageofs_in; 736 732 pcl->pageofs_out = map->m_la & ~PAGE_MASK; 733 + pcl->from_meta = map->m_flags & EROFS_MAP_META; 737 734 fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; 738 735 739 736 /* ··· 742 741 mutex_init(&pcl->lock); 743 742 DBG_BUGON(!mutex_trylock(&pcl->lock)); 744 743 745 - if (ztailpacking) { 746 - pcl->index = 0; /* which indicates ztailpacking */ 747 - } else { 748 - pcl->index = erofs_blknr(sb, map->m_pa); 744 + if (!pcl->from_meta) { 749 745 while (1) { 750 746 xa_lock(&sbi->managed_pslots); 751 - pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index, 747 + pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->pos, 752 748 NULL, pcl, GFP_KERNEL); 753 749 if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) { 754 750 xa_unlock(&sbi->managed_pslots); ··· 777 779 { 778 780 struct erofs_map_blocks *map = &fe->map; 779 781 struct super_block *sb = fe->inode->i_sb; 780 - erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); 781 782 struct z_erofs_pcluster *pcl = NULL; 782 783 int ret; 783 784 ··· 787 790 if (!(map->m_flags & EROFS_MAP_META)) { 788 791 while (1) { 789 792 rcu_read_lock(); 790 - pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr); 793 + pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa); 791 794 if (!pcl || z_erofs_get_pcluster(pcl)) { 792 - DBG_BUGON(pcl && blknr != pcl->index); 795 + DBG_BUGON(pcl && map->m_pa != pcl->pos); 793 796 rcu_read_unlock(); 794 797 break; 795 798 } ··· 823 826 824 827 z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset, 825 828 Z_EROFS_INLINE_BVECS, fe->pcl->vcnt); 826 - if (!z_erofs_is_inline_pcluster(fe->pcl)) { 829 + if (!fe->pcl->from_meta) { 827 830 /* bind cache first when cached decompression is preferred */ 828 831 z_erofs_bind_cache(fe); 829 832 } else { 830 833 void *mptr; 831 834 832 - mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, EROFS_NO_KMAP); 835 + mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false); 833 836 if (IS_ERR(mptr)) { 834 837 ret = PTR_ERR(mptr); 835 838 erofs_err(sb, "failed to get inline data %d", ret); ··· 868 871 * It's impossible to fail after the pcluster is freezed, but in order 869 872 * to avoid some race conditions, add a DBG_BUGON to observe this. 870 873 */ 871 - DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl); 874 + DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->pos) != pcl); 872 875 873 876 lockref_mark_dead(&pcl->lockref); 874 877 return true; ··· 964 967 buf.mapping = packed_inode->i_mapping; 965 968 for (; cur < end; cur += cnt, pos += cnt) { 966 969 cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); 967 - src = erofs_bread(&buf, pos, EROFS_KMAP); 970 + src = erofs_bread(&buf, pos, true); 968 971 if (IS_ERR(src)) { 969 972 erofs_put_metabuf(&buf); 970 973 return PTR_ERR(src); ··· 1218 1221 } 1219 1222 be->compressed_pages[i] = page; 1220 1223 1221 - if (z_erofs_is_inline_pcluster(pcl) || 1224 + if (pcl->from_meta || 1222 1225 erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) { 1223 1226 if (!PageUptodate(page)) 1224 1227 err = -EIO; ··· 1281 1284 .sb = be->sb, 1282 1285 .in = be->compressed_pages, 1283 1286 .out = be->decompressed_pages, 1287 + .inpages = pclusterpages, 1288 + .outpages = be->nr_pages, 1284 1289 .pageofs_in = pcl->pageofs_in, 1285 1290 .pageofs_out = pcl->pageofs_out, 1286 1291 .inputsize = pcl->pclustersize, ··· 1296 1297 }, be->pagepool); 1297 1298 1298 1299 /* must handle all compressed pages before actual file pages */ 1299 - if (z_erofs_is_inline_pcluster(pcl)) { 1300 + if (pcl->from_meta) { 1300 1301 page = pcl->compressed_bvecs[0].page; 1301 1302 WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); 1302 1303 put_page(page); ··· 1356 1357 WRITE_ONCE(pcl->next, NULL); 1357 1358 mutex_unlock(&pcl->lock); 1358 1359 1359 - if (z_erofs_is_inline_pcluster(pcl)) 1360 + if (pcl->from_meta) 1360 1361 z_erofs_free_pcluster(pcl); 1361 1362 else 1362 1363 z_erofs_put_pcluster(sbi, pcl, try_free); ··· 1537 1538 folio = page_folio(page); 1538 1539 out_tocache: 1539 1540 if (!tocache || bs != PAGE_SIZE || 1540 - filemap_add_folio(mc, folio, pcl->index + nr, gfp)) { 1541 + filemap_add_folio(mc, folio, (pcl->pos >> PAGE_SHIFT) + nr, gfp)) { 1541 1542 /* turn into a temporary shortlived folio (1 ref) */ 1542 1543 folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; 1543 1544 return; ··· 1654 1655 1655 1656 pcl = next; 1656 1657 next = READ_ONCE(pcl->next); 1657 - if (z_erofs_is_inline_pcluster(pcl)) { 1658 + if (pcl->from_meta) { 1658 1659 z_erofs_move_to_bypass_queue(pcl, next, qtail); 1659 1660 continue; 1660 1661 } 1661 1662 1662 1663 /* no device id here, thus it will always succeed */ 1663 1664 mdev = (struct erofs_map_dev) { 1664 - .m_pa = erofs_pos(sb, pcl->index), 1665 + .m_pa = round_down(pcl->pos, sb->s_blocksize), 1665 1666 }; 1666 1667 (void)erofs_map_dev(sb, &mdev); 1667 1668 1668 1669 cur = mdev.m_pa; 1669 - end = cur + pcl->pclustersize; 1670 + end = round_up(cur + pcl->pageofs_in + pcl->pclustersize, 1671 + sb->s_blocksize); 1670 1672 do { 1671 1673 bvec.bv_page = NULL; 1672 1674 if (bio && (cur != last_pa ||
+192 -98
fs/erofs/zmap.c
··· 25 25 { 26 26 struct inode *const inode = m->inode; 27 27 struct erofs_inode *const vi = EROFS_I(inode); 28 - const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) + 28 + const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) + 29 29 vi->inode_isize + vi->xattr_isize) + 30 30 lcn * sizeof(struct z_erofs_lcluster_index); 31 31 struct z_erofs_lcluster_index *di; 32 32 unsigned int advise; 33 33 34 - di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, EROFS_KMAP); 34 + di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true); 35 35 if (IS_ERR(di)) 36 36 return PTR_ERR(di); 37 37 m->lcn = lcn; ··· 40 40 advise = le16_to_cpu(di->di_advise); 41 41 m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; 42 42 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { 43 - m->clusterofs = 1 << vi->z_logical_clusterbits; 43 + m->clusterofs = 1 << vi->z_lclusterbits; 44 44 m->delta[0] = le16_to_cpu(di->di_u.delta[0]); 45 45 if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { 46 46 if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | ··· 55 55 } else { 56 56 m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); 57 57 m->clusterofs = le16_to_cpu(di->di_clusterofs); 58 - if (m->clusterofs >= 1 << vi->z_logical_clusterbits) { 58 + if (m->clusterofs >= 1 << vi->z_lclusterbits) { 59 59 DBG_BUGON(1); 60 60 return -EFSCORRUPTED; 61 61 } ··· 102 102 { 103 103 struct inode *const inode = m->inode; 104 104 struct erofs_inode *const vi = EROFS_I(inode); 105 - const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + 106 - ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); 107 - const unsigned int lclusterbits = vi->z_logical_clusterbits; 105 + const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + 106 + vi->inode_isize + vi->xattr_isize); 107 + const unsigned int lclusterbits = vi->z_lclusterbits; 108 108 const unsigned int totalidx = erofs_iblks(inode); 109 109 unsigned int compacted_4b_initial, compacted_2b, amortizedshift; 110 110 unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; ··· 146 146 else 147 147 return -EOPNOTSUPP; 148 148 149 - in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, EROFS_KMAP); 149 + in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true); 150 150 if (IS_ERR(in)) 151 151 return PTR_ERR(in); 152 152 ··· 255 255 { 256 256 struct super_block *sb = m->inode->i_sb; 257 257 struct erofs_inode *const vi = EROFS_I(m->inode); 258 - const unsigned int lclusterbits = vi->z_logical_clusterbits; 258 + const unsigned int lclusterbits = vi->z_lclusterbits; 259 259 260 260 while (m->lcn >= lookback_distance) { 261 261 unsigned long lcn = m->lcn - lookback_distance; ··· 265 265 if (err) 266 266 return err; 267 267 268 - switch (m->type) { 269 - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: 270 - lookback_distance = m->delta[0]; 271 - if (!lookback_distance) 272 - goto err_bogus; 273 - continue; 274 - case Z_EROFS_LCLUSTER_TYPE_PLAIN: 275 - case Z_EROFS_LCLUSTER_TYPE_HEAD1: 276 - case Z_EROFS_LCLUSTER_TYPE_HEAD2: 277 - m->headtype = m->type; 278 - m->map->m_la = (lcn << lclusterbits) | m->clusterofs; 279 - return 0; 280 - default: 268 + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { 281 269 erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", 282 270 m->type, lcn, vi->nid); 283 271 DBG_BUGON(1); 284 272 return -EOPNOTSUPP; 273 + } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { 274 + lookback_distance = m->delta[0]; 275 + if (!lookback_distance) 276 + break; 277 + continue; 278 + } else { 279 + m->headtype = m->type; 280 + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; 281 + return 0; 285 282 } 286 283 } 287 - err_bogus: 288 284 erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", 289 285 lookback_distance, m->lcn, vi->nid); 290 286 DBG_BUGON(1); ··· 304 308 if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || 305 309 ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || 306 310 m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || 307 - (lcn << vi->z_logical_clusterbits) >= inode->i_size) 311 + (lcn << vi->z_lclusterbits) >= inode->i_size) 308 312 m->compressedblks = 1; 309 313 310 314 if (m->compressedblks) ··· 325 329 DBG_BUGON(lcn == initial_lcn && 326 330 m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); 327 331 328 - switch (m->type) { 329 - case Z_EROFS_LCLUSTER_TYPE_PLAIN: 330 - case Z_EROFS_LCLUSTER_TYPE_HEAD1: 331 - case Z_EROFS_LCLUSTER_TYPE_HEAD2: 332 + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { 333 + if (m->delta[0] != 1) { 334 + erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); 335 + DBG_BUGON(1); 336 + return -EFSCORRUPTED; 337 + } 338 + if (m->compressedblks) 339 + goto out; 340 + } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { 332 341 /* 333 342 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type 334 343 * rather than CBLKCNT, it's a 1 block-sized pcluster. 335 344 */ 336 345 m->compressedblks = 1; 337 - break; 338 - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: 339 - if (m->delta[0] != 1) 340 - goto err_bonus_cblkcnt; 341 - if (m->compressedblks) 342 - break; 343 - fallthrough; 344 - default: 345 - erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, 346 - vi->nid); 347 - DBG_BUGON(1); 348 - return -EFSCORRUPTED; 346 + goto out; 349 347 } 348 + erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); 349 + DBG_BUGON(1); 350 + return -EFSCORRUPTED; 350 351 out: 351 352 m->map->m_plen = erofs_pos(sb, m->compressedblks); 352 353 return 0; 353 - err_bonus_cblkcnt: 354 - erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); 355 - DBG_BUGON(1); 356 - return -EFSCORRUPTED; 357 354 } 358 355 359 356 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) ··· 354 365 struct inode *inode = m->inode; 355 366 struct erofs_inode *vi = EROFS_I(inode); 356 367 struct erofs_map_blocks *map = m->map; 357 - unsigned int lclusterbits = vi->z_logical_clusterbits; 368 + unsigned int lclusterbits = vi->z_lclusterbits; 358 369 u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; 359 370 int err; 360 371 ··· 375 386 m->delta[1] = 1; 376 387 DBG_BUGON(1); 377 388 } 378 - } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN || 379 - m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 || 380 - m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) { 389 + } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { 381 390 if (lcn != headlcn) 382 391 break; /* ends at the next HEAD lcluster */ 383 392 m->delta[1] = 1; ··· 391 404 return 0; 392 405 } 393 406 394 - static int z_erofs_do_map_blocks(struct inode *inode, 407 + static int z_erofs_map_blocks_fo(struct inode *inode, 395 408 struct erofs_map_blocks *map, int flags) 396 409 { 397 - struct erofs_inode *const vi = EROFS_I(inode); 398 - bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; 410 + struct erofs_inode *vi = EROFS_I(inode); 411 + struct super_block *sb = inode->i_sb; 399 412 bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; 413 + bool ztailpacking = vi->z_idata_size; 414 + unsigned int lclusterbits = vi->z_lclusterbits; 400 415 struct z_erofs_maprecorder m = { 401 416 .inode = inode, 402 417 .map = map, 403 418 }; 404 419 int err = 0; 405 - unsigned int lclusterbits, endoff, afmt; 420 + unsigned int endoff, afmt; 406 421 unsigned long initial_lcn; 407 422 unsigned long long ofs, end; 408 423 409 - lclusterbits = vi->z_logical_clusterbits; 410 424 ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; 425 + if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) && 426 + !vi->z_tailextent_headlcn) { 427 + map->m_la = 0; 428 + map->m_llen = inode->i_size; 429 + map->m_flags = EROFS_MAP_MAPPED | 430 + EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; 431 + return 0; 432 + } 411 433 initial_lcn = ofs >> lclusterbits; 412 434 endoff = ofs & ((1 << lclusterbits) - 1); 413 435 ··· 424 428 if (err) 425 429 goto unmap_out; 426 430 427 - if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL)) 428 - vi->z_idataoff = m.nextpackoff; 429 - 431 + if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking) 432 + vi->z_fragmentoff = m.nextpackoff; 430 433 map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; 431 434 end = (m.lcn + 1ULL) << lclusterbits; 432 435 ··· 447 452 } 448 453 /* m.lcn should be >= 1 if endoff < m.clusterofs */ 449 454 if (!m.lcn) { 450 - erofs_err(inode->i_sb, 451 - "invalid logical cluster 0 at nid %llu", 455 + erofs_err(sb, "invalid logical cluster 0 at nid %llu", 452 456 vi->nid); 453 457 err = -EFSCORRUPTED; 454 458 goto unmap_out; ··· 463 469 goto unmap_out; 464 470 break; 465 471 default: 466 - erofs_err(inode->i_sb, 467 - "unknown type %u @ offset %llu of nid %llu", 472 + erofs_err(sb, "unknown type %u @ offset %llu of nid %llu", 468 473 m.type, ofs, vi->nid); 469 474 err = -EOPNOTSUPP; 470 475 goto unmap_out; ··· 480 487 } 481 488 if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { 482 489 map->m_flags |= EROFS_MAP_META; 483 - map->m_pa = vi->z_idataoff; 490 + map->m_pa = vi->z_fragmentoff; 484 491 map->m_plen = vi->z_idata_size; 492 + if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { 493 + erofs_err(sb, "invalid tail-packing pclustersize %llu", 494 + map->m_plen); 495 + err = -EFSCORRUPTED; 496 + goto unmap_out; 497 + } 485 498 } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { 486 499 map->m_flags |= EROFS_MAP_FRAGMENT; 487 500 } else { 488 - map->m_pa = erofs_pos(inode->i_sb, m.pblk); 501 + map->m_pa = erofs_pos(sb, m.pblk); 489 502 err = z_erofs_get_extent_compressedlen(&m, initial_lcn); 490 503 if (err) 491 504 goto unmap_out; ··· 510 511 afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ? 511 512 vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; 512 513 if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { 513 - erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", 514 + erofs_err(sb, "inconsistent algorithmtype %u for nid %llu", 514 515 afmt, vi->nid); 515 516 err = -EFSCORRUPTED; 516 517 goto unmap_out; ··· 532 533 unmap_out: 533 534 erofs_unmap_metabuf(&m.map->buf); 534 535 return err; 536 + } 537 + 538 + static int z_erofs_map_blocks_ext(struct inode *inode, 539 + struct erofs_map_blocks *map, int flags) 540 + { 541 + struct erofs_inode *vi = EROFS_I(inode); 542 + struct super_block *sb = inode->i_sb; 543 + bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER; 544 + unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); 545 + erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + 546 + vi->inode_isize + vi->xattr_isize), recsz); 547 + erofs_off_t lend = inode->i_size; 548 + erofs_off_t l, r, mid, pa, la, lstart; 549 + struct z_erofs_extent *ext; 550 + unsigned int fmt; 551 + bool last; 552 + 553 + map->m_flags = 0; 554 + if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { 555 + if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { 556 + ext = erofs_read_metabuf(&map->buf, sb, pos, true); 557 + if (IS_ERR(ext)) 558 + return PTR_ERR(ext); 559 + pa = le64_to_cpu(*(__le64 *)ext); 560 + pos += sizeof(__le64); 561 + lstart = 0; 562 + } else { 563 + lstart = map->m_la >> vi->z_lclusterbits; 564 + pa = EROFS_NULL_ADDR; 565 + } 566 + 567 + for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { 568 + ext = erofs_read_metabuf(&map->buf, sb, pos, true); 569 + if (IS_ERR(ext)) 570 + return PTR_ERR(ext); 571 + map->m_plen = le32_to_cpu(ext->plen); 572 + if (pa != EROFS_NULL_ADDR) { 573 + map->m_pa = pa; 574 + pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK; 575 + } else { 576 + map->m_pa = le32_to_cpu(ext->pstart_lo); 577 + } 578 + pos += recsz; 579 + } 580 + last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits)); 581 + lend = min(lstart, lend); 582 + lstart -= 1 << vi->z_lclusterbits; 583 + } else { 584 + lstart = lend; 585 + for (l = 0, r = vi->z_extents; l < r; ) { 586 + mid = l + (r - l) / 2; 587 + ext = erofs_read_metabuf(&map->buf, sb, 588 + pos + mid * recsz, true); 589 + if (IS_ERR(ext)) 590 + return PTR_ERR(ext); 591 + 592 + la = le32_to_cpu(ext->lstart_lo); 593 + pa = le32_to_cpu(ext->pstart_lo) | 594 + (u64)le32_to_cpu(ext->pstart_hi) << 32; 595 + if (recsz > offsetof(struct z_erofs_extent, lstart_hi)) 596 + la |= (u64)le32_to_cpu(ext->lstart_hi) << 32; 597 + 598 + if (la > map->m_la) { 599 + r = mid; 600 + lend = la; 601 + } else { 602 + l = mid + 1; 603 + if (map->m_la == la) 604 + r = min(l + 1, r); 605 + lstart = la; 606 + map->m_plen = le32_to_cpu(ext->plen); 607 + map->m_pa = pa; 608 + } 609 + } 610 + last = (l >= vi->z_extents); 611 + } 612 + 613 + if (lstart < lend) { 614 + map->m_la = lstart; 615 + if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { 616 + map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; 617 + vi->z_fragmentoff = map->m_plen; 618 + if (recsz >= offsetof(struct z_erofs_extent, pstart_lo)) 619 + vi->z_fragmentoff |= map->m_pa << 32; 620 + } else if (map->m_plen) { 621 + map->m_flags |= EROFS_MAP_MAPPED | 622 + EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; 623 + fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; 624 + if (fmt) 625 + map->m_algorithmformat = fmt - 1; 626 + else if (interlaced && !erofs_blkoff(sb, map->m_pa)) 627 + map->m_algorithmformat = 628 + Z_EROFS_COMPRESSION_INTERLACED; 629 + else 630 + map->m_algorithmformat = 631 + Z_EROFS_COMPRESSION_SHIFTED; 632 + if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) 633 + map->m_flags |= EROFS_MAP_PARTIAL_REF; 634 + map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; 635 + } 636 + } 637 + map->m_llen = lend - map->m_la; 638 + if (!last && map->m_llen < sb->s_blocksize) { 639 + erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", 640 + map->m_llen, map->m_la, vi->nid); 641 + DBG_BUGON(1); 642 + return -EFSCORRUPTED; 643 + } 644 + return 0; 535 645 } 536 646 537 647 static int z_erofs_fill_inode_lazy(struct inode *inode) ··· 669 561 goto out_unlock; 670 562 671 563 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); 672 - h = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); 564 + h = erofs_read_metabuf(&buf, sb, pos, true); 673 565 if (IS_ERR(h)) { 674 566 err = PTR_ERR(h); 675 567 goto out_unlock; ··· 686 578 goto done; 687 579 } 688 580 vi->z_advise = le16_to_cpu(h->h_advise); 581 + vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15); 582 + if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && 583 + (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) { 584 + vi->z_extents = le32_to_cpu(h->h_extents_lo) | 585 + ((u64)le16_to_cpu(h->h_extents_hi) << 32); 586 + goto done; 587 + } 588 + 689 589 vi->z_algorithmtype[0] = h->h_algorithmtype & 15; 690 590 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; 591 + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) 592 + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); 593 + else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) 594 + vi->z_idata_size = le16_to_cpu(h->h_idata_size); 691 595 692 596 headnr = 0; 693 597 if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX || ··· 710 590 goto out_put_metabuf; 711 591 } 712 592 713 - vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7); 714 593 if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && 715 594 vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | 716 595 Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { ··· 727 608 goto out_put_metabuf; 728 609 } 729 610 730 - if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { 611 + if (vi->z_idata_size || 612 + (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { 731 613 struct erofs_map_blocks map = { 732 614 .buf = __EROFS_BUF_INITIALIZER 733 615 }; 734 616 735 - vi->z_idata_size = le16_to_cpu(h->h_idata_size); 736 - err = z_erofs_do_map_blocks(inode, &map, 737 - EROFS_GET_BLOCKS_FINDTAIL); 738 - erofs_put_metabuf(&map.buf); 739 - 740 - if (!map.m_plen || 741 - erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) { 742 - erofs_err(sb, "invalid tail-packing pclustersize %llu", 743 - map.m_plen); 744 - err = -EFSCORRUPTED; 745 - } 746 - if (err < 0) 747 - goto out_put_metabuf; 748 - } 749 - 750 - if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && 751 - !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { 752 - struct erofs_map_blocks map = { 753 - .buf = __EROFS_BUF_INITIALIZER 754 - }; 755 - 756 - vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); 757 - err = z_erofs_do_map_blocks(inode, &map, 617 + err = z_erofs_map_blocks_fo(inode, &map, 758 618 EROFS_GET_BLOCKS_FINDTAIL); 759 619 erofs_put_metabuf(&map.buf); 760 620 if (err < 0) ··· 764 666 } else { 765 667 err = z_erofs_fill_inode_lazy(inode); 766 668 if (!err) { 767 - if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && 768 - !vi->z_tailextent_headlcn) { 769 - map->m_la = 0; 770 - map->m_llen = inode->i_size; 771 - map->m_flags = EROFS_MAP_MAPPED | 772 - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; 773 - } else { 774 - err = z_erofs_do_map_blocks(inode, map, flags); 775 - } 669 + if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && 670 + (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) 671 + err = z_erofs_map_blocks_ext(inode, map, flags); 672 + else 673 + err = z_erofs_map_blocks_fo(inode, map, flags); 776 674 } 777 675 if (!err && (map->m_flags & EROFS_MAP_ENCODED) && 778 676 unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
+1 -1
include/trace/events/erofs.h
··· 75 75 __entry->ofs = erofs_blkoff(inode->i_sb, erofs_iloc(inode)); 76 76 ), 77 77 78 - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", 78 + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %llu ofs %u", 79 79 show_dev_nid(__entry), 80 80 __entry->blkaddr, __entry->ofs) 81 81 );