btrfs: Add zstd support

Add zstd compression and decompression support to BtrFS. zstd at its
fastest level compresses almost as well as zlib, while offering much
faster compression and decompression, approaching lzo speeds.

I benchmarked btrfs with zstd compression against no compression, lzo
compression, and zlib compression. I benchmarked two scenarios. Copying
a set of files to btrfs, and then reading the files. Copying a tarball
to btrfs, extracting it to btrfs, and then reading the extracted files.
After every operation, I call `sync` and include the sync time.
Between every pair of operations I unmount and remount the filesystem
to avoid caching. The benchmark files can be found in the upstream
zstd source repository under
`contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}`
[1] [2].

I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor,
16 GB of RAM, and a SSD.

The first compression benchmark is copying 10 copies of the unzipped
Silesia corpus [3] into a BtrFS filesystem mounted with
`-o compress-force=Method`. The decompression benchmark times how long
it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is
measured by comparing the output of `df` and `du`. See the benchmark file
[1] for details. I benchmarked multiple zstd compression levels, although
the patch uses zstd level 1.

| Method | Ratio | Compression MB/s | Decompression speed |
|---------|-------|------------------|---------------------|
| None | 0.99 | 504 | 686 |
| lzo | 1.66 | 398 | 442 |
| zlib | 2.58 | 65 | 241 |
| zstd 1 | 2.57 | 260 | 383 |
| zstd 3 | 2.71 | 174 | 408 |
| zstd 6 | 2.87 | 70 | 398 |
| zstd 9 | 2.92 | 43 | 406 |
| zstd 12 | 2.93 | 21 | 408 |
| zstd 15 | 3.01 | 11 | 354 |

The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it
measures the compression ratio, extracts the tar, and deletes the tar.
Then it measures the compression ratio again, and `tar`s the extracted
files into `/dev/null`. See the benchmark file [2] for details.

| Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) |
|--------|-----------|---------------|----------|------------|----------|
| None | 0.97 | 0.78 | 0.981 | 5.501 | 8.807 |
| lzo | 2.06 | 1.38 | 1.631 | 8.458 | 8.585 |
| zlib | 3.40 | 1.86 | 7.750 | 21.544 | 11.744 |
| zstd 1 | 3.57 | 1.85 | 2.579 | 11.479 | 9.389 |

[1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh
[2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh
[3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
[4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz

zstd source repository: https://github.com/facebook/zstd

Signed-off-by: Nick Terrell <terrelln@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Nick Terrell and committed by
Chris Mason
5c1aab1d 73f3d1b4

+468 -12
+2
fs/btrfs/Kconfig
··· 6 select ZLIB_DEFLATE 7 select LZO_COMPRESS 8 select LZO_DECOMPRESS 9 select RAID6_PQ 10 select XOR_BLOCKS 11 select SRCU
··· 6 select ZLIB_DEFLATE 7 select LZO_COMPRESS 8 select LZO_DECOMPRESS 9 + select ZSTD_COMPRESS 10 + select ZSTD_DECOMPRESS 11 select RAID6_PQ 12 select XOR_BLOCKS 13 select SRCU
+1 -1
fs/btrfs/Makefile
··· 6 transaction.o inode.o file.o tree-defrag.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 - export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ 12 uuid-tree.o props.o hash.o free-space-tree.o
··· 6 transaction.o inode.o file.o tree-defrag.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 + export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ 12 uuid-tree.o props.o hash.o free-space-tree.o
+1
fs/btrfs/compression.c
··· 704 static const struct btrfs_compress_op * const btrfs_compress_op[] = { 705 &btrfs_zlib_compress, 706 &btrfs_lzo_compress, 707 }; 708 709 void __init btrfs_init_compress(void)
··· 704 static const struct btrfs_compress_op * const btrfs_compress_op[] = { 705 &btrfs_zlib_compress, 706 &btrfs_lzo_compress, 707 + &btrfs_zstd_compress, 708 }; 709 710 void __init btrfs_init_compress(void)
+4 -2
fs/btrfs/compression.h
··· 99 BTRFS_COMPRESS_NONE = 0, 100 BTRFS_COMPRESS_ZLIB = 1, 101 BTRFS_COMPRESS_LZO = 2, 102 - BTRFS_COMPRESS_TYPES = 2, 103 - BTRFS_COMPRESS_LAST = 3, 104 }; 105 106 struct btrfs_compress_op { ··· 129 130 extern const struct btrfs_compress_op btrfs_zlib_compress; 131 extern const struct btrfs_compress_op btrfs_lzo_compress; 132 133 #endif
··· 99 BTRFS_COMPRESS_NONE = 0, 100 BTRFS_COMPRESS_ZLIB = 1, 101 BTRFS_COMPRESS_LZO = 2, 102 + BTRFS_COMPRESS_ZSTD = 3, 103 + BTRFS_COMPRESS_TYPES = 3, 104 + BTRFS_COMPRESS_LAST = 4, 105 }; 106 107 struct btrfs_compress_op { ··· 128 129 extern const struct btrfs_compress_op btrfs_zlib_compress; 130 extern const struct btrfs_compress_op btrfs_lzo_compress; 131 + extern const struct btrfs_compress_op btrfs_zstd_compress; 132 133 #endif
+1
fs/btrfs/ctree.h
··· 270 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 271 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 272 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 273 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 274 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 275 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
··· 270 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 271 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 272 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 273 + BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ 274 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 275 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 276 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+2
fs/btrfs/disk-io.c
··· 2828 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2829 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 2830 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2831 2832 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2833 btrfs_info(fs_info, "has skinny extents");
··· 2828 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2829 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 2830 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2831 + else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) 2832 + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; 2833 2834 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2835 btrfs_info(fs_info, "has skinny extents");
+5 -1
fs/btrfs/ioctl.c
··· 327 328 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 329 comp = "lzo"; 330 - else 331 comp = "zlib"; 332 ret = btrfs_set_prop(inode, "btrfs.compression", 333 comp, strlen(comp), 0); 334 if (ret) ··· 1468 1469 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1470 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1471 } 1472 1473 ret = defrag_count;
··· 327 328 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 329 comp = "lzo"; 330 + else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) 331 comp = "zlib"; 332 + else 333 + comp = "zstd"; 334 ret = btrfs_set_prop(inode, "btrfs.compression", 335 comp, strlen(comp), 0); 336 if (ret) ··· 1466 1467 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1468 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1469 + } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { 1470 + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); 1471 } 1472 1473 ret = defrag_count;
+6
fs/btrfs/props.c
··· 390 return 0; 391 else if (!strncmp("zlib", value, len)) 392 return 0; 393 394 return -EINVAL; 395 } ··· 414 type = BTRFS_COMPRESS_LZO; 415 else if (!strncmp("zlib", value, len)) 416 type = BTRFS_COMPRESS_ZLIB; 417 else 418 return -EINVAL; 419 ··· 433 return "zlib"; 434 case BTRFS_COMPRESS_LZO: 435 return "lzo"; 436 } 437 438 return NULL;
··· 390 return 0; 391 else if (!strncmp("zlib", value, len)) 392 return 0; 393 + else if (!strncmp("zstd", value, len)) 394 + return 0; 395 396 return -EINVAL; 397 } ··· 412 type = BTRFS_COMPRESS_LZO; 413 else if (!strncmp("zlib", value, len)) 414 type = BTRFS_COMPRESS_ZLIB; 415 + else if (!strncmp("zstd", value, len)) 416 + type = BTRFS_COMPRESS_ZSTD; 417 else 418 return -EINVAL; 419 ··· 429 return "zlib"; 430 case BTRFS_COMPRESS_LZO: 431 return "lzo"; 432 + case BTRFS_COMPRESS_ZSTD: 433 + return "zstd"; 434 } 435 436 return NULL;
+11 -1
fs/btrfs/super.c
··· 513 btrfs_clear_opt(info->mount_opt, NODATASUM); 514 btrfs_set_fs_incompat(info, COMPRESS_LZO); 515 no_compress = 0; 516 } else if (strncmp(args[0].from, "no", 2) == 0) { 517 compress_type = "no"; 518 btrfs_clear_opt(info->mount_opt, COMPRESS); ··· 1235 if (btrfs_test_opt(info, COMPRESS)) { 1236 if (info->compress_type == BTRFS_COMPRESS_ZLIB) 1237 compress_type = "zlib"; 1238 - else 1239 compress_type = "lzo"; 1240 if (btrfs_test_opt(info, FORCE_COMPRESS)) 1241 seq_printf(seq, ",compress-force=%s", compress_type); 1242 else
··· 513 btrfs_clear_opt(info->mount_opt, NODATASUM); 514 btrfs_set_fs_incompat(info, COMPRESS_LZO); 515 no_compress = 0; 516 + } else if (strcmp(args[0].from, "zstd") == 0) { 517 + compress_type = "zstd"; 518 + info->compress_type = BTRFS_COMPRESS_ZSTD; 519 + btrfs_set_opt(info->mount_opt, COMPRESS); 520 + btrfs_clear_opt(info->mount_opt, NODATACOW); 521 + btrfs_clear_opt(info->mount_opt, NODATASUM); 522 + btrfs_set_fs_incompat(info, COMPRESS_ZSTD); 523 + no_compress = 0; 524 } else if (strncmp(args[0].from, "no", 2) == 0) { 525 compress_type = "no"; 526 btrfs_clear_opt(info->mount_opt, COMPRESS); ··· 1227 if (btrfs_test_opt(info, COMPRESS)) { 1228 if (info->compress_type == BTRFS_COMPRESS_ZLIB) 1229 compress_type = "zlib"; 1230 + else if (info->compress_type == BTRFS_COMPRESS_LZO) 1231 compress_type = "lzo"; 1232 + else 1233 + compress_type = "zstd"; 1234 if (btrfs_test_opt(info, FORCE_COMPRESS)) 1235 seq_printf(seq, ",compress-force=%s", compress_type); 1236 else
+2
fs/btrfs/sysfs.c
··· 200 BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); 201 BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); 202 BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); 203 BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); 204 BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); 205 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); ··· 213 BTRFS_FEAT_ATTR_PTR(default_subvol), 214 BTRFS_FEAT_ATTR_PTR(mixed_groups), 215 BTRFS_FEAT_ATTR_PTR(compress_lzo), 216 BTRFS_FEAT_ATTR_PTR(big_metadata), 217 BTRFS_FEAT_ATTR_PTR(extended_iref), 218 BTRFS_FEAT_ATTR_PTR(raid56),
··· 200 BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); 201 BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); 202 BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); 203 + BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD); 204 BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); 205 BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); 206 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); ··· 212 BTRFS_FEAT_ATTR_PTR(default_subvol), 213 BTRFS_FEAT_ATTR_PTR(mixed_groups), 214 BTRFS_FEAT_ATTR_PTR(compress_lzo), 215 + BTRFS_FEAT_ATTR_PTR(compress_zstd), 216 BTRFS_FEAT_ATTR_PTR(big_metadata), 217 BTRFS_FEAT_ATTR_PTR(extended_iref), 218 BTRFS_FEAT_ATTR_PTR(raid56),
+432
fs/btrfs/zstd.c
···
··· 1 + /* 2 + * Copyright (c) 2016-present, Facebook, Inc. 3 + * All rights reserved. 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public 7 + * License v2 as published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 + * General Public License for more details. 13 + */ 14 + #include <linux/bio.h> 15 + #include <linux/err.h> 16 + #include <linux/init.h> 17 + #include <linux/kernel.h> 18 + #include <linux/mm.h> 19 + #include <linux/pagemap.h> 20 + #include <linux/refcount.h> 21 + #include <linux/sched.h> 22 + #include <linux/slab.h> 23 + #include <linux/zstd.h> 24 + #include "compression.h" 25 + 26 + #define ZSTD_BTRFS_MAX_WINDOWLOG 17 27 + #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) 28 + #define ZSTD_BTRFS_DEFAULT_LEVEL 3 29 + 30 + static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) 31 + { 32 + ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL, 33 + src_len, 0); 34 + 35 + if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) 36 + params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; 37 + WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); 38 + return params; 39 + } 40 + 41 + struct workspace { 42 + void *mem; 43 + size_t size; 44 + char *buf; 45 + struct list_head list; 46 + }; 47 + 48 + static void zstd_free_workspace(struct list_head *ws) 49 + { 50 + struct workspace *workspace = list_entry(ws, struct workspace, list); 51 + 52 + kvfree(workspace->mem); 53 + kfree(workspace->buf); 54 + kfree(workspace); 55 + } 56 + 57 + static struct list_head *zstd_alloc_workspace(void) 58 + { 59 + ZSTD_parameters params = 60 + zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); 61 + struct workspace *workspace; 62 + 63 + workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); 64 + if (!workspace) 65 + return ERR_PTR(-ENOMEM); 66 + 67 + workspace->size = max_t(size_t, 68 + ZSTD_CStreamWorkspaceBound(params.cParams), 69 + ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); 70 + workspace->mem = kvmalloc(workspace->size, GFP_KERNEL); 71 + workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 72 + if (!workspace->mem || !workspace->buf) 73 + goto fail; 74 + 75 + INIT_LIST_HEAD(&workspace->list); 76 + 77 + return &workspace->list; 78 + fail: 79 + zstd_free_workspace(&workspace->list); 80 + return ERR_PTR(-ENOMEM); 81 + } 82 + 83 + static int zstd_compress_pages(struct list_head *ws, 84 + struct address_space *mapping, 85 + u64 start, 86 + struct page **pages, 87 + unsigned long *out_pages, 88 + unsigned long *total_in, 89 + unsigned long *total_out) 90 + { 91 + struct workspace *workspace = list_entry(ws, struct workspace, list); 92 + ZSTD_CStream *stream; 93 + int ret = 0; 94 + int nr_pages = 0; 95 + struct page *in_page = NULL; /* The current page to read */ 96 + struct page *out_page = NULL; /* The current page to write to */ 97 + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; 98 + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; 99 + unsigned long tot_in = 0; 100 + unsigned long tot_out = 0; 101 + unsigned long len = *total_out; 102 + const unsigned long nr_dest_pages = *out_pages; 103 + unsigned long max_out = nr_dest_pages * PAGE_SIZE; 104 + ZSTD_parameters params = zstd_get_btrfs_parameters(len); 105 + 106 + *out_pages = 0; 107 + *total_out = 0; 108 + *total_in = 0; 109 + 110 + /* Initialize the stream */ 111 + stream = ZSTD_initCStream(params, len, workspace->mem, 112 + workspace->size); 113 + if (!stream) { 114 + pr_warn("BTRFS: ZSTD_initCStream failed\n"); 115 + ret = -EIO; 116 + goto out; 117 + } 118 + 119 + /* map in the first page of input data */ 120 + in_page = find_get_page(mapping, start >> PAGE_SHIFT); 121 + in_buf.src = kmap(in_page); 122 + in_buf.pos = 0; 123 + in_buf.size = min_t(size_t, len, PAGE_SIZE); 124 + 125 + 126 + /* Allocate and map in the output buffer */ 127 + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 128 + if (out_page == NULL) { 129 + ret = -ENOMEM; 130 + goto out; 131 + } 132 + pages[nr_pages++] = out_page; 133 + out_buf.dst = kmap(out_page); 134 + out_buf.pos = 0; 135 + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); 136 + 137 + while (1) { 138 + size_t ret2; 139 + 140 + ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); 141 + if (ZSTD_isError(ret2)) { 142 + pr_debug("BTRFS: ZSTD_compressStream returned %d\n", 143 + ZSTD_getErrorCode(ret2)); 144 + ret = -EIO; 145 + goto out; 146 + } 147 + 148 + /* Check to see if we are making it bigger */ 149 + if (tot_in + in_buf.pos > 8192 && 150 + tot_in + in_buf.pos < 151 + tot_out + out_buf.pos) { 152 + ret = -E2BIG; 153 + goto out; 154 + } 155 + 156 + /* We've reached the end of our output range */ 157 + if (out_buf.pos >= max_out) { 158 + tot_out += out_buf.pos; 159 + ret = -E2BIG; 160 + goto out; 161 + } 162 + 163 + /* Check if we need more output space */ 164 + if (out_buf.pos == out_buf.size) { 165 + tot_out += PAGE_SIZE; 166 + max_out -= PAGE_SIZE; 167 + kunmap(out_page); 168 + if (nr_pages == nr_dest_pages) { 169 + out_page = NULL; 170 + ret = -E2BIG; 171 + goto out; 172 + } 173 + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 174 + if (out_page == NULL) { 175 + ret = -ENOMEM; 176 + goto out; 177 + } 178 + pages[nr_pages++] = out_page; 179 + out_buf.dst = kmap(out_page); 180 + out_buf.pos = 0; 181 + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); 182 + } 183 + 184 + /* We've reached the end of the input */ 185 + if (in_buf.pos >= len) { 186 + tot_in += in_buf.pos; 187 + break; 188 + } 189 + 190 + /* Check if we need more input */ 191 + if (in_buf.pos == in_buf.size) { 192 + tot_in += PAGE_SIZE; 193 + kunmap(in_page); 194 + put_page(in_page); 195 + 196 + start += PAGE_SIZE; 197 + len -= PAGE_SIZE; 198 + in_page = find_get_page(mapping, start >> PAGE_SHIFT); 199 + in_buf.src = kmap(in_page); 200 + in_buf.pos = 0; 201 + in_buf.size = min_t(size_t, len, PAGE_SIZE); 202 + } 203 + } 204 + while (1) { 205 + size_t ret2; 206 + 207 + ret2 = ZSTD_endStream(stream, &out_buf); 208 + if (ZSTD_isError(ret2)) { 209 + pr_debug("BTRFS: ZSTD_endStream returned %d\n", 210 + ZSTD_getErrorCode(ret2)); 211 + ret = -EIO; 212 + goto out; 213 + } 214 + if (ret2 == 0) { 215 + tot_out += out_buf.pos; 216 + break; 217 + } 218 + if (out_buf.pos >= max_out) { 219 + tot_out += out_buf.pos; 220 + ret = -E2BIG; 221 + goto out; 222 + } 223 + 224 + tot_out += PAGE_SIZE; 225 + max_out -= PAGE_SIZE; 226 + kunmap(out_page); 227 + if (nr_pages == nr_dest_pages) { 228 + out_page = NULL; 229 + ret = -E2BIG; 230 + goto out; 231 + } 232 + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 233 + if (out_page == NULL) { 234 + ret = -ENOMEM; 235 + goto out; 236 + } 237 + pages[nr_pages++] = out_page; 238 + out_buf.dst = kmap(out_page); 239 + out_buf.pos = 0; 240 + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); 241 + } 242 + 243 + if (tot_out >= tot_in) { 244 + ret = -E2BIG; 245 + goto out; 246 + } 247 + 248 + ret = 0; 249 + *total_in = tot_in; 250 + *total_out = tot_out; 251 + out: 252 + *out_pages = nr_pages; 253 + /* Cleanup */ 254 + if (in_page) { 255 + kunmap(in_page); 256 + put_page(in_page); 257 + } 258 + if (out_page) 259 + kunmap(out_page); 260 + return ret; 261 + } 262 + 263 + static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 264 + { 265 + struct workspace *workspace = list_entry(ws, struct workspace, list); 266 + struct page **pages_in = cb->compressed_pages; 267 + u64 disk_start = cb->start; 268 + struct bio *orig_bio = cb->orig_bio; 269 + size_t srclen = cb->compressed_len; 270 + ZSTD_DStream *stream; 271 + int ret = 0; 272 + unsigned long page_in_index = 0; 273 + unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); 274 + unsigned long buf_start; 275 + unsigned long total_out = 0; 276 + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; 277 + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; 278 + 279 + stream = ZSTD_initDStream( 280 + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); 281 + if (!stream) { 282 + pr_debug("BTRFS: ZSTD_initDStream failed\n"); 283 + ret = -EIO; 284 + goto done; 285 + } 286 + 287 + in_buf.src = kmap(pages_in[page_in_index]); 288 + in_buf.pos = 0; 289 + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); 290 + 291 + out_buf.dst = workspace->buf; 292 + out_buf.pos = 0; 293 + out_buf.size = PAGE_SIZE; 294 + 295 + while (1) { 296 + size_t ret2; 297 + 298 + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); 299 + if (ZSTD_isError(ret2)) { 300 + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", 301 + ZSTD_getErrorCode(ret2)); 302 + ret = -EIO; 303 + goto done; 304 + } 305 + buf_start = total_out; 306 + total_out += out_buf.pos; 307 + out_buf.pos = 0; 308 + 309 + ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, 310 + total_out, disk_start, orig_bio); 311 + if (ret == 0) 312 + break; 313 + 314 + if (in_buf.pos >= srclen) 315 + break; 316 + 317 + /* Check if we've hit the end of a frame */ 318 + if (ret2 == 0) 319 + break; 320 + 321 + if (in_buf.pos == in_buf.size) { 322 + kunmap(pages_in[page_in_index++]); 323 + if (page_in_index >= total_pages_in) { 324 + in_buf.src = NULL; 325 + ret = -EIO; 326 + goto done; 327 + } 328 + srclen -= PAGE_SIZE; 329 + in_buf.src = kmap(pages_in[page_in_index]); 330 + in_buf.pos = 0; 331 + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); 332 + } 333 + } 334 + ret = 0; 335 + zero_fill_bio(orig_bio); 336 + done: 337 + if (in_buf.src) 338 + kunmap(pages_in[page_in_index]); 339 + return ret; 340 + } 341 + 342 + static int zstd_decompress(struct list_head *ws, unsigned char *data_in, 343 + struct page *dest_page, 344 + unsigned long start_byte, 345 + size_t srclen, size_t destlen) 346 + { 347 + struct workspace *workspace = list_entry(ws, struct workspace, list); 348 + ZSTD_DStream *stream; 349 + int ret = 0; 350 + size_t ret2; 351 + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; 352 + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; 353 + unsigned long total_out = 0; 354 + unsigned long pg_offset = 0; 355 + char *kaddr; 356 + 357 + stream = ZSTD_initDStream( 358 + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); 359 + if (!stream) { 360 + pr_warn("BTRFS: ZSTD_initDStream failed\n"); 361 + ret = -EIO; 362 + goto finish; 363 + } 364 + 365 + destlen = min_t(size_t, destlen, PAGE_SIZE); 366 + 367 + in_buf.src = data_in; 368 + in_buf.pos = 0; 369 + in_buf.size = srclen; 370 + 371 + out_buf.dst = workspace->buf; 372 + out_buf.pos = 0; 373 + out_buf.size = PAGE_SIZE; 374 + 375 + ret2 = 1; 376 + while (pg_offset < destlen && in_buf.pos < in_buf.size) { 377 + unsigned long buf_start; 378 + unsigned long buf_offset; 379 + unsigned long bytes; 380 + 381 + /* Check if the frame is over and we still need more input */ 382 + if (ret2 == 0) { 383 + pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); 384 + ret = -EIO; 385 + goto finish; 386 + } 387 + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); 388 + if (ZSTD_isError(ret2)) { 389 + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", 390 + ZSTD_getErrorCode(ret2)); 391 + ret = -EIO; 392 + goto finish; 393 + } 394 + 395 + buf_start = total_out; 396 + total_out += out_buf.pos; 397 + out_buf.pos = 0; 398 + 399 + if (total_out <= start_byte) 400 + continue; 401 + 402 + if (total_out > start_byte && buf_start < start_byte) 403 + buf_offset = start_byte - buf_start; 404 + else 405 + buf_offset = 0; 406 + 407 + bytes = min_t(unsigned long, destlen - pg_offset, 408 + out_buf.size - buf_offset); 409 + 410 + kaddr = kmap_atomic(dest_page); 411 + memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); 412 + kunmap_atomic(kaddr); 413 + 414 + pg_offset += bytes; 415 + } 416 + ret = 0; 417 + finish: 418 + if (pg_offset < destlen) { 419 + kaddr = kmap_atomic(dest_page); 420 + memset(kaddr + pg_offset, 0, destlen - pg_offset); 421 + kunmap_atomic(kaddr); 422 + } 423 + return ret; 424 + } 425 + 426 + const struct btrfs_compress_op btrfs_zstd_compress = { 427 + .alloc_workspace = zstd_alloc_workspace, 428 + .free_workspace = zstd_free_workspace, 429 + .compress_pages = zstd_compress_pages, 430 + .decompress_bio = zstd_decompress_bio, 431 + .decompress = zstd_decompress, 432 + };
+1 -7
include/uapi/linux/btrfs.h
··· 255 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 256 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 257 #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 258 - /* 259 - * some patches floated around with a second compression method 260 - * lets save that incompat here for when they do get in 261 - * Note we don't actually support it, we're just reserving the 262 - * number 263 - */ 264 - #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) 265 266 /* 267 * older kernels tried to do bigger metadata blocks, but the
··· 255 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 256 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 257 #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 258 + #define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) 259 260 /* 261 * older kernels tried to do bigger metadata blocks, but the