Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: Allow to add new compression algorithm

Make the code aware of compression type, instead of always assuming
zlib compression.

Also make the zlib workspace function as common code for all
compression types.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>

Li Zefan 261507a0 4b72029d

+474 -283
+1 -1
fs/btrfs/btrfs_inode.h
··· 157 157 /* 158 158 * always compress this one file 159 159 */ 160 - unsigned force_compress:1; 160 + unsigned force_compress:4; 161 161 162 162 struct inode vfs_inode; 163 163 };
+231 -5
fs/btrfs/compression.c
··· 62 62 /* number of bytes on disk */ 63 63 unsigned long compressed_len; 64 64 65 + /* the compression algorithm for this bio */ 66 + int compress_type; 67 + 65 68 /* number of compressed pages in the array */ 66 69 unsigned long nr_pages; 67 70 ··· 176 173 /* ok, we're the last bio for this extent, lets start 177 174 * the decompression. 178 175 */ 179 - ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 180 - cb->start, 181 - cb->orig_bio->bi_io_vec, 182 - cb->orig_bio->bi_vcnt, 183 - cb->compressed_len); 176 + ret = btrfs_decompress_biovec(cb->compress_type, 177 + cb->compressed_pages, 178 + cb->start, 179 + cb->orig_bio->bi_io_vec, 180 + cb->orig_bio->bi_vcnt, 181 + cb->compressed_len); 184 182 csum_failed: 185 183 if (ret) 186 184 cb->errors = 1; ··· 592 588 593 589 cb->len = uncompressed_len; 594 590 cb->compressed_len = compressed_len; 591 + cb->compress_type = extent_compress_type(bio_flags); 595 592 cb->orig_bio = bio; 596 593 597 594 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / ··· 681 676 682 677 bio_put(comp_bio); 683 678 return 0; 679 + } 680 + 681 + static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; 682 + static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; 683 + static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; 684 + static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 685 + static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 686 + 687 + struct btrfs_compress_op *btrfs_compress_op[] = { 688 + &btrfs_zlib_compress, 689 + }; 690 + 691 + int __init btrfs_init_compress(void) 692 + { 693 + int i; 694 + 695 + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { 696 + INIT_LIST_HEAD(&comp_idle_workspace[i]); 697 + spin_lock_init(&comp_workspace_lock[i]); 698 + atomic_set(&comp_alloc_workspace[i], 0); 699 + init_waitqueue_head(&comp_workspace_wait[i]); 700 + } 701 + return 0; 702 + } 703 + 704 + /* 705 + * this finds an available workspace or allocates a new one 706 + * ERR_PTR is returned if things go bad. 707 + */ 708 + static struct list_head *find_workspace(int type) 709 + { 710 + struct list_head *workspace; 711 + int cpus = num_online_cpus(); 712 + int idx = type - 1; 713 + 714 + struct list_head *idle_workspace = &comp_idle_workspace[idx]; 715 + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; 716 + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; 717 + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; 718 + int *num_workspace = &comp_num_workspace[idx]; 719 + again: 720 + spin_lock(workspace_lock); 721 + if (!list_empty(idle_workspace)) { 722 + workspace = idle_workspace->next; 723 + list_del(workspace); 724 + (*num_workspace)--; 725 + spin_unlock(workspace_lock); 726 + return workspace; 727 + 728 + } 729 + if (atomic_read(alloc_workspace) > cpus) { 730 + DEFINE_WAIT(wait); 731 + 732 + spin_unlock(workspace_lock); 733 + prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 734 + if (atomic_read(alloc_workspace) > cpus && !*num_workspace) 735 + schedule(); 736 + finish_wait(workspace_wait, &wait); 737 + goto again; 738 + } 739 + atomic_inc(alloc_workspace); 740 + spin_unlock(workspace_lock); 741 + 742 + workspace = btrfs_compress_op[idx]->alloc_workspace(); 743 + if (IS_ERR(workspace)) { 744 + atomic_dec(alloc_workspace); 745 + wake_up(workspace_wait); 746 + } 747 + return workspace; 748 + } 749 + 750 + /* 751 + * put a workspace struct back on the list or free it if we have enough 752 + * idle ones sitting around 753 + */ 754 + static void free_workspace(int type, struct list_head *workspace) 755 + { 756 + int idx = type - 1; 757 + struct list_head *idle_workspace = &comp_idle_workspace[idx]; 758 + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; 759 + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; 760 + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; 761 + int *num_workspace = &comp_num_workspace[idx]; 762 + 763 + spin_lock(workspace_lock); 764 + if (*num_workspace < num_online_cpus()) { 765 + list_add_tail(workspace, idle_workspace); 766 + (*num_workspace)++; 767 + spin_unlock(workspace_lock); 768 + goto wake; 769 + } 770 + spin_unlock(workspace_lock); 771 + 772 + btrfs_compress_op[idx]->free_workspace(workspace); 773 + atomic_dec(alloc_workspace); 774 + wake: 775 + if (waitqueue_active(workspace_wait)) 776 + wake_up(workspace_wait); 777 + } 778 + 779 + /* 780 + * cleanup function for module exit 781 + */ 782 + static void free_workspaces(void) 783 + { 784 + struct list_head *workspace; 785 + int i; 786 + 787 + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { 788 + while (!list_empty(&comp_idle_workspace[i])) { 789 + workspace = comp_idle_workspace[i].next; 790 + list_del(workspace); 791 + btrfs_compress_op[i]->free_workspace(workspace); 792 + atomic_dec(&comp_alloc_workspace[i]); 793 + } 794 + } 795 + } 796 + 797 + /* 798 + * given an address space and start/len, compress the bytes. 799 + * 800 + * pages are allocated to hold the compressed result and stored 801 + * in 'pages' 802 + * 803 + * out_pages is used to return the number of pages allocated. There 804 + * may be pages allocated even if we return an error 805 + * 806 + * total_in is used to return the number of bytes actually read. It 807 + * may be smaller then len if we had to exit early because we 808 + * ran out of room in the pages array or because we cross the 809 + * max_out threshold. 810 + * 811 + * total_out is used to return the total number of compressed bytes 812 + * 813 + * max_out tells us the max number of bytes that we're allowed to 814 + * stuff into pages 815 + */ 816 + int btrfs_compress_pages(int type, struct address_space *mapping, 817 + u64 start, unsigned long len, 818 + struct page **pages, 819 + unsigned long nr_dest_pages, 820 + unsigned long *out_pages, 821 + unsigned long *total_in, 822 + unsigned long *total_out, 823 + unsigned long max_out) 824 + { 825 + struct list_head *workspace; 826 + int ret; 827 + 828 + workspace = find_workspace(type); 829 + if (IS_ERR(workspace)) 830 + return -1; 831 + 832 + ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, 833 + start, len, pages, 834 + nr_dest_pages, out_pages, 835 + total_in, total_out, 836 + max_out); 837 + free_workspace(type, workspace); 838 + return ret; 839 + } 840 + 841 + /* 842 + * pages_in is an array of pages with compressed data. 843 + * 844 + * disk_start is the starting logical offset of this array in the file 845 + * 846 + * bvec is a bio_vec of pages from the file that we want to decompress into 847 + * 848 + * vcnt is the count of pages in the biovec 849 + * 850 + * srclen is the number of bytes in pages_in 851 + * 852 + * The basic idea is that we have a bio that was created by readpages. 853 + * The pages in the bio are for the uncompressed data, and they may not 854 + * be contiguous. They all correspond to the range of bytes covered by 855 + * the compressed extent. 856 + */ 857 + int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 858 + struct bio_vec *bvec, int vcnt, size_t srclen) 859 + { 860 + struct list_head *workspace; 861 + int ret; 862 + 863 + workspace = find_workspace(type); 864 + if (IS_ERR(workspace)) 865 + return -ENOMEM; 866 + 867 + ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, 868 + disk_start, 869 + bvec, vcnt, srclen); 870 + free_workspace(type, workspace); 871 + return ret; 872 + } 873 + 874 + /* 875 + * a less complex decompression routine. Our compressed data fits in a 876 + * single page, and we want to read a single page out of it. 877 + * start_byte tells us the offset into the compressed data we're interested in 878 + */ 879 + int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 880 + unsigned long start_byte, size_t srclen, size_t destlen) 881 + { 882 + struct list_head *workspace; 883 + int ret; 884 + 885 + workspace = find_workspace(type); 886 + if (IS_ERR(workspace)) 887 + return -ENOMEM; 888 + 889 + ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, 890 + dest_page, start_byte, 891 + srclen, destlen); 892 + 893 + free_workspace(type, workspace); 894 + return ret; 895 + } 896 + 897 + void __exit btrfs_exit_compress(void) 898 + { 899 + free_workspaces(); 684 900 }
+48 -18
fs/btrfs/compression.h
··· 19 19 #ifndef __BTRFS_COMPRESSION_ 20 20 #define __BTRFS_COMPRESSION_ 21 21 22 - int btrfs_zlib_decompress(unsigned char *data_in, 23 - struct page *dest_page, 24 - unsigned long start_byte, 25 - size_t srclen, size_t destlen); 26 - int btrfs_zlib_compress_pages(struct address_space *mapping, 27 - u64 start, unsigned long len, 28 - struct page **pages, 29 - unsigned long nr_dest_pages, 30 - unsigned long *out_pages, 31 - unsigned long *total_in, 32 - unsigned long *total_out, 33 - unsigned long max_out); 34 - int btrfs_zlib_decompress_biovec(struct page **pages_in, 35 - u64 disk_start, 36 - struct bio_vec *bvec, 37 - int vcnt, 38 - size_t srclen); 39 - void btrfs_zlib_exit(void); 22 + int btrfs_init_compress(void); 23 + void btrfs_exit_compress(void); 24 + 25 + int btrfs_compress_pages(int type, struct address_space *mapping, 26 + u64 start, unsigned long len, 27 + struct page **pages, 28 + unsigned long nr_dest_pages, 29 + unsigned long *out_pages, 30 + unsigned long *total_in, 31 + unsigned long *total_out, 32 + unsigned long max_out); 33 + int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 34 + struct bio_vec *bvec, int vcnt, size_t srclen); 35 + int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 36 + unsigned long start_byte, size_t srclen, size_t destlen); 37 + 40 38 int btrfs_submit_compressed_write(struct inode *inode, u64 start, 41 39 unsigned long len, u64 disk_start, 42 40 unsigned long compressed_len, ··· 42 44 unsigned long nr_pages); 43 45 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 44 46 int mirror_num, unsigned long bio_flags); 47 + 48 + struct btrfs_compress_op { 49 + struct list_head *(*alloc_workspace)(void); 50 + 51 + void (*free_workspace)(struct list_head *workspace); 52 + 53 + int (*compress_pages)(struct list_head *workspace, 54 + struct address_space *mapping, 55 + u64 start, unsigned long len, 56 + struct page **pages, 57 + unsigned long nr_dest_pages, 58 + unsigned long *out_pages, 59 + unsigned long *total_in, 60 + unsigned long *total_out, 61 + unsigned long max_out); 62 + 63 + int (*decompress_biovec)(struct list_head *workspace, 64 + struct page **pages_in, 65 + u64 disk_start, 66 + struct bio_vec *bvec, 67 + int vcnt, 68 + size_t srclen); 69 + 70 + int (*decompress)(struct list_head *workspace, 71 + unsigned char *data_in, 72 + struct page *dest_page, 73 + unsigned long start_byte, 74 + size_t srclen, size_t destlen); 75 + }; 76 + 77 + extern struct btrfs_compress_op btrfs_zlib_compress; 78 + 45 79 #endif
+6 -4
fs/btrfs/ctree.h
··· 551 551 } __attribute__ ((__packed__)); 552 552 553 553 enum btrfs_compression_type { 554 - BTRFS_COMPRESS_NONE = 0, 555 - BTRFS_COMPRESS_ZLIB = 1, 556 - BTRFS_COMPRESS_LAST = 2, 554 + BTRFS_COMPRESS_NONE = 0, 555 + BTRFS_COMPRESS_ZLIB = 1, 556 + BTRFS_COMPRESS_TYPES = 1, 557 + BTRFS_COMPRESS_LAST = 2, 557 558 }; 558 559 559 560 struct btrfs_inode_item { ··· 896 895 */ 897 896 u64 last_trans_log_full_commit; 898 897 u64 open_ioctl_trans; 899 - unsigned long mount_opt; 898 + unsigned long mount_opt:20; 899 + unsigned long compress_type:4; 900 900 u64 max_inline; 901 901 u64 alloc_start; 902 902 struct btrfs_transaction *running_transaction;
+4 -1
fs/btrfs/extent_io.c
··· 2028 2028 BUG_ON(extent_map_end(em) <= cur); 2029 2029 BUG_ON(end < cur); 2030 2030 2031 - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2031 + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2032 2032 this_bio_flag = EXTENT_BIO_COMPRESSED; 2033 + extent_set_compress_type(&this_bio_flag, 2034 + em->compress_type); 2035 + } 2033 2036 2034 2037 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2035 2038 cur_end = min(extent_map_end(em) - 1, end);
+16 -1
fs/btrfs/extent_io.h
··· 20 20 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 21 21 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 22 22 23 - /* flags for bio submission */ 23 + /* 24 + * flags for bio submission. The high bits indicate the compression 25 + * type for this bio 26 + */ 24 27 #define EXTENT_BIO_COMPRESSED 1 28 + #define EXTENT_BIO_FLAG_SHIFT 16 25 29 26 30 /* these are bit numbers for test/set bit */ 27 31 #define EXTENT_BUFFER_UPTODATE 0 ··· 138 134 */ 139 135 wait_queue_head_t lock_wq; 140 136 }; 137 + 138 + static inline void extent_set_compress_type(unsigned long *bio_flags, 139 + int compress_type) 140 + { 141 + *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; 142 + } 143 + 144 + static inline int extent_compress_type(unsigned long bio_flags) 145 + { 146 + return bio_flags >> EXTENT_BIO_FLAG_SHIFT; 147 + } 141 148 142 149 struct extent_map_tree; 143 150
+2
fs/btrfs/extent_map.c
··· 3 3 #include <linux/module.h> 4 4 #include <linux/spinlock.h> 5 5 #include <linux/hardirq.h> 6 + #include "ctree.h" 6 7 #include "extent_map.h" 7 8 8 9 ··· 55 54 return em; 56 55 em->in_tree = 0; 57 56 em->flags = 0; 57 + em->compress_type = BTRFS_COMPRESS_NONE; 58 58 atomic_set(&em->refs, 1); 59 59 return em; 60 60 }
+2 -1
fs/btrfs/extent_map.h
··· 26 26 unsigned long flags; 27 27 struct block_device *bdev; 28 28 atomic_t refs; 29 - int in_tree; 29 + unsigned int in_tree:1; 30 + unsigned int compress_type:4; 30 31 }; 31 32 32 33 struct extent_map_tree {
+2
fs/btrfs/file.c
··· 224 224 225 225 split->bdev = em->bdev; 226 226 split->flags = flags; 227 + split->compress_type = em->compress_type; 227 228 ret = add_extent_mapping(em_tree, split); 228 229 BUG_ON(ret); 229 230 free_extent_map(split); ··· 239 238 split->len = em->start + em->len - (start + len); 240 239 split->bdev = em->bdev; 241 240 split->flags = flags; 241 + split->compress_type = em->compress_type; 242 242 243 243 if (compressed) { 244 244 split->block_len = em->block_len;
+51 -31
fs/btrfs/inode.c
··· 122 122 size_t cur_size = size; 123 123 size_t datasize; 124 124 unsigned long offset; 125 - int use_compress = 0; 125 + int compress_type = BTRFS_COMPRESS_NONE; 126 126 127 127 if (compressed_size && compressed_pages) { 128 - use_compress = 1; 128 + compress_type = root->fs_info->compress_type; 129 129 cur_size = compressed_size; 130 130 } 131 131 ··· 159 159 btrfs_set_file_extent_ram_bytes(leaf, ei, size); 160 160 ptr = btrfs_file_extent_inline_start(ei); 161 161 162 - if (use_compress) { 162 + if (compress_type != BTRFS_COMPRESS_NONE) { 163 163 struct page *cpage; 164 164 int i = 0; 165 165 while (compressed_size > 0) { ··· 176 176 compressed_size -= cur_size; 177 177 } 178 178 btrfs_set_file_extent_compression(leaf, ei, 179 - BTRFS_COMPRESS_ZLIB); 179 + compress_type); 180 180 } else { 181 181 page = find_get_page(inode->i_mapping, 182 182 start >> PAGE_CACHE_SHIFT); ··· 263 263 u64 compressed_size; 264 264 struct page **pages; 265 265 unsigned long nr_pages; 266 + int compress_type; 266 267 struct list_head list; 267 268 }; 268 269 ··· 281 280 u64 start, u64 ram_size, 282 281 u64 compressed_size, 283 282 struct page **pages, 284 - unsigned long nr_pages) 283 + unsigned long nr_pages, 284 + int compress_type) 285 285 { 286 286 struct async_extent *async_extent; 287 287 ··· 292 290 async_extent->compressed_size = compressed_size; 293 291 async_extent->pages = pages; 294 292 async_extent->nr_pages = nr_pages; 293 + async_extent->compress_type = compress_type; 295 294 list_add_tail(&async_extent->list, &cow->extents); 296 295 return 0; 297 296 } ··· 335 332 unsigned long max_uncompressed = 128 * 1024; 336 333 int i; 337 334 int will_compress; 335 + int compress_type = root->fs_info->compress_type; 338 336 339 337 actual_end = min_t(u64, isize, end + 1); 340 338 again: ··· 385 381 WARN_ON(pages); 386 382 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 387 383 388 - ret = btrfs_zlib_compress_pages(inode->i_mapping, start, 389 - total_compressed, pages, 390 - nr_pages, &nr_pages_ret, 391 - &total_in, 392 - &total_compressed, 393 - max_compressed); 384 + if (BTRFS_I(inode)->force_compress) 385 + compress_type = BTRFS_I(inode)->force_compress; 386 + 387 + ret = btrfs_compress_pages(compress_type, 388 + inode->i_mapping, start, 389 + total_compressed, pages, 390 + nr_pages, &nr_pages_ret, 391 + &total_in, 392 + &total_compressed, 393 + max_compressed); 394 394 395 395 if (!ret) { 396 396 unsigned long offset = total_compressed & ··· 501 493 * and will submit them to the elevator. 502 494 */ 503 495 add_async_extent(async_cow, start, num_bytes, 504 - total_compressed, pages, nr_pages_ret); 496 + total_compressed, pages, nr_pages_ret, 497 + compress_type); 505 498 506 499 if (start + num_bytes < end) { 507 500 start += num_bytes; ··· 524 515 __set_page_dirty_nobuffers(locked_page); 525 516 /* unlocked later on in the async handlers */ 526 517 } 527 - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); 518 + add_async_extent(async_cow, start, end - start + 1, 519 + 0, NULL, 0, BTRFS_COMPRESS_NONE); 528 520 *num_added += 1; 529 521 } 530 522 ··· 650 640 em->block_start = ins.objectid; 651 641 em->block_len = ins.offset; 652 642 em->bdev = root->fs_info->fs_devices->latest_bdev; 643 + em->compress_type = async_extent->compress_type; 653 644 set_bit(EXTENT_FLAG_PINNED, &em->flags); 654 645 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 655 646 ··· 667 656 async_extent->ram_size - 1, 0); 668 657 } 669 658 670 - ret = btrfs_add_ordered_extent(inode, async_extent->start, 671 - ins.objectid, 672 - async_extent->ram_size, 673 - ins.offset, 674 - BTRFS_ORDERED_COMPRESSED); 659 + ret = btrfs_add_ordered_extent_compress(inode, 660 + async_extent->start, 661 + ins.objectid, 662 + async_extent->ram_size, 663 + ins.offset, 664 + BTRFS_ORDERED_COMPRESSED, 665 + async_extent->compress_type); 675 666 BUG_ON(ret); 676 667 677 668 /* ··· 1683 1670 struct btrfs_ordered_extent *ordered_extent = NULL; 1684 1671 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1685 1672 struct extent_state *cached_state = NULL; 1686 - int compressed = 0; 1673 + int compress_type = 0; 1687 1674 int ret; 1688 1675 bool nolock = false; 1689 1676 ··· 1724 1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1725 1712 1726 1713 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1727 - compressed = 1; 1714 + compress_type = ordered_extent->compress_type; 1728 1715 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1729 - BUG_ON(compressed); 1716 + BUG_ON(compress_type); 1730 1717 ret = btrfs_mark_extent_written(trans, inode, 1731 1718 ordered_extent->file_offset, 1732 1719 ordered_extent->file_offset + ··· 1740 1727 ordered_extent->disk_len, 1741 1728 ordered_extent->len, 1742 1729 ordered_extent->len, 1743 - compressed, 0, 0, 1730 + compress_type, 0, 0, 1744 1731 BTRFS_FILE_EXTENT_REG); 1745 1732 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1746 1733 ordered_extent->file_offset, ··· 1842 1829 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 1843 1830 logical = em->block_start; 1844 1831 failrec->bio_flags = EXTENT_BIO_COMPRESSED; 1832 + extent_set_compress_type(&failrec->bio_flags, 1833 + em->compress_type); 1845 1834 } 1846 1835 failrec->logical = logical; 1847 1836 free_extent_map(em); ··· 4945 4930 size_t max_size; 4946 4931 unsigned long inline_size; 4947 4932 unsigned long ptr; 4933 + int compress_type; 4948 4934 4949 4935 WARN_ON(pg_offset != 0); 4936 + compress_type = btrfs_file_extent_compression(leaf, item); 4950 4937 max_size = btrfs_file_extent_ram_bytes(leaf, item); 4951 4938 inline_size = btrfs_file_extent_inline_item_len(leaf, 4952 4939 btrfs_item_nr(leaf, path->slots[0])); ··· 4958 4941 read_extent_buffer(leaf, tmp, ptr, inline_size); 4959 4942 4960 4943 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); 4961 - ret = btrfs_zlib_decompress(tmp, page, extent_offset, 4962 - inline_size, max_size); 4944 + ret = btrfs_decompress(compress_type, tmp, page, 4945 + extent_offset, inline_size, max_size); 4963 4946 if (ret) { 4964 4947 char *kaddr = kmap_atomic(page, KM_USER0); 4965 4948 unsigned long copy_size = min_t(u64, ··· 5001 4984 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5002 4985 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5003 4986 struct btrfs_trans_handle *trans = NULL; 5004 - int compressed; 4987 + int compress_type; 5005 4988 5006 4989 again: 5007 4990 read_lock(&em_tree->lock); ··· 5060 5043 5061 5044 found_type = btrfs_file_extent_type(leaf, item); 5062 5045 extent_start = found_key.offset; 5063 - compressed = btrfs_file_extent_compression(leaf, item); 5046 + compress_type = btrfs_file_extent_compression(leaf, item); 5064 5047 if (found_type == BTRFS_FILE_EXTENT_REG || 5065 5048 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5066 5049 extent_end = extent_start + ··· 5106 5089 em->block_start = EXTENT_MAP_HOLE; 5107 5090 goto insert; 5108 5091 } 5109 - if (compressed) { 5092 + if (compress_type != BTRFS_COMPRESS_NONE) { 5110 5093 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5094 + em->compress_type = compress_type; 5111 5095 em->block_start = bytenr; 5112 5096 em->block_len = btrfs_file_extent_disk_num_bytes(leaf, 5113 5097 item); ··· 5142 5124 em->len = (copy_size + root->sectorsize - 1) & 5143 5125 ~((u64)root->sectorsize - 1); 5144 5126 em->orig_start = EXTENT_MAP_INLINE; 5145 - if (compressed) 5127 + if (compress_type) { 5146 5128 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5129 + em->compress_type = compress_type; 5130 + } 5147 5131 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 5148 5132 if (create == 0 && !PageUptodate(page)) { 5149 - if (btrfs_file_extent_compression(leaf, item) == 5150 - BTRFS_COMPRESS_ZLIB) { 5133 + if (btrfs_file_extent_compression(leaf, item) != 5134 + BTRFS_COMPRESS_NONE) { 5151 5135 ret = uncompress_inline(path, inode, page, 5152 5136 pg_offset, 5153 5137 extent_offset, item); ··· 6499 6479 ei->ordered_data_close = 0; 6500 6480 ei->orphan_meta_reserved = 0; 6501 6481 ei->dummy_inode = 0; 6502 - ei->force_compress = 0; 6482 + ei->force_compress = BTRFS_COMPRESS_NONE; 6503 6483 6504 6484 inode = &ei->vfs_inode; 6505 6485 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
+2 -2
fs/btrfs/ioctl.c
··· 683 683 total_read++; 684 684 mutex_lock(&inode->i_mutex); 685 685 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 686 - BTRFS_I(inode)->force_compress = 1; 686 + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_ZLIB; 687 687 688 688 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 689 689 if (ret) ··· 781 781 atomic_dec(&root->fs_info->async_submit_draining); 782 782 783 783 mutex_lock(&inode->i_mutex); 784 - BTRFS_I(inode)->force_compress = 0; 784 + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 785 785 mutex_unlock(&inode->i_mutex); 786 786 } 787 787
+15 -3
fs/btrfs/ordered-data.c
··· 172 172 */ 173 173 static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 174 174 u64 start, u64 len, u64 disk_len, 175 - int type, int dio) 175 + int type, int dio, int compress_type) 176 176 { 177 177 struct btrfs_ordered_inode_tree *tree; 178 178 struct rb_node *node; ··· 189 189 entry->disk_len = disk_len; 190 190 entry->bytes_left = len; 191 191 entry->inode = inode; 192 + entry->compress_type = compress_type; 192 193 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 193 194 set_bit(type, &entry->flags); 194 195 ··· 221 220 u64 start, u64 len, u64 disk_len, int type) 222 221 { 223 222 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 224 - disk_len, type, 0); 223 + disk_len, type, 0, 224 + BTRFS_COMPRESS_NONE); 225 225 } 226 226 227 227 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 228 228 u64 start, u64 len, u64 disk_len, int type) 229 229 { 230 230 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 231 - disk_len, type, 1); 231 + disk_len, type, 1, 232 + BTRFS_COMPRESS_NONE); 233 + } 234 + 235 + int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 236 + u64 start, u64 len, u64 disk_len, 237 + int type, int compress_type) 238 + { 239 + return __btrfs_add_ordered_extent(inode, file_offset, start, len, 240 + disk_len, type, 0, 241 + compress_type); 232 242 } 233 243 234 244 /*
+7 -1
fs/btrfs/ordered-data.h
··· 68 68 69 69 #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ 70 70 71 - #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ 71 + #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ 72 72 73 73 #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ 74 74 ··· 92 92 93 93 /* flags (described above) */ 94 94 unsigned long flags; 95 + 96 + /* compression algorithm */ 97 + int compress_type; 95 98 96 99 /* reference count */ 97 100 atomic_t refs; ··· 151 148 u64 start, u64 len, u64 disk_len, int type); 152 149 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 153 150 u64 start, u64 len, u64 disk_len, int type); 151 + int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 152 + u64 start, u64 len, u64 disk_len, 153 + int type, int compress_type); 154 154 int btrfs_add_ordered_sum(struct inode *inode, 155 155 struct btrfs_ordered_extent *entry, 156 156 struct btrfs_ordered_sum *sum);
+36 -11
fs/btrfs/super.c
··· 69 69 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, 70 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 71 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 72 - Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 73 - Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, 74 - Opt_user_subvol_rm_allowed, 72 + Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 73 + Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 74 + Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 75 75 }; 76 76 77 77 static match_table_t tokens = { ··· 86 86 {Opt_alloc_start, "alloc_start=%s"}, 87 87 {Opt_thread_pool, "thread_pool=%d"}, 88 88 {Opt_compress, "compress"}, 89 + {Opt_compress_type, "compress=%s"}, 89 90 {Opt_compress_force, "compress-force"}, 91 + {Opt_compress_force_type, "compress-force=%s"}, 90 92 {Opt_ssd, "ssd"}, 91 93 {Opt_ssd_spread, "ssd_spread"}, 92 94 {Opt_nossd, "nossd"}, ··· 114 112 char *p, *num, *orig; 115 113 int intarg; 116 114 int ret = 0; 115 + char *compress_type; 116 + bool compress_force = false; 117 117 118 118 if (!options) 119 119 return 0; ··· 158 154 btrfs_set_opt(info->mount_opt, NODATACOW); 159 155 btrfs_set_opt(info->mount_opt, NODATASUM); 160 156 break; 161 - case Opt_compress: 162 - printk(KERN_INFO "btrfs: use compression\n"); 163 - btrfs_set_opt(info->mount_opt, COMPRESS); 164 - break; 165 157 case Opt_compress_force: 166 - printk(KERN_INFO "btrfs: forcing compression\n"); 167 - btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 158 + case Opt_compress_force_type: 159 + compress_force = true; 160 + case Opt_compress: 161 + case Opt_compress_type: 162 + if (token == Opt_compress || 163 + token == Opt_compress_force || 164 + strcmp(args[0].from, "zlib") == 0) { 165 + compress_type = "zlib"; 166 + info->compress_type = BTRFS_COMPRESS_ZLIB; 167 + } else { 168 + ret = -EINVAL; 169 + goto out; 170 + } 171 + 168 172 btrfs_set_opt(info->mount_opt, COMPRESS); 173 + if (compress_force) { 174 + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 175 + pr_info("btrfs: force %s compression\n", 176 + compress_type); 177 + } else 178 + pr_info("btrfs: use %s compression\n", 179 + compress_type); 169 180 break; 170 181 case Opt_ssd: 171 182 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); ··· 917 898 if (err) 918 899 return err; 919 900 920 - err = btrfs_init_cachep(); 901 + err = btrfs_init_compress(); 921 902 if (err) 922 903 goto free_sysfs; 904 + 905 + err = btrfs_init_cachep(); 906 + if (err) 907 + goto free_compress; 923 908 924 909 err = extent_io_init(); 925 910 if (err) ··· 952 929 extent_io_exit(); 953 930 free_cachep: 954 931 btrfs_destroy_cachep(); 932 + free_compress: 933 + btrfs_exit_compress(); 955 934 free_sysfs: 956 935 btrfs_exit_sysfs(); 957 936 return err; ··· 968 943 unregister_filesystem(&btrfs_fs_type); 969 944 btrfs_exit_sysfs(); 970 945 btrfs_cleanup_fs_uuids(); 971 - btrfs_zlib_exit(); 946 + btrfs_exit_compress(); 972 947 } 973 948 974 949 module_init(init_btrfs_fs)
+51 -204
fs/btrfs/zlib.c
··· 32 32 #include <linux/bio.h> 33 33 #include "compression.h" 34 34 35 - /* Plan: call deflate() with avail_in == *sourcelen, 36 - avail_out = *dstlen - 12 and flush == Z_FINISH. 37 - If it doesn't manage to finish, call it again with 38 - avail_in == 0 and avail_out set to the remaining 12 39 - bytes for it to clean up. 40 - Q: Is 12 bytes sufficient? 41 - */ 42 - #define STREAM_END_SPACE 12 43 - 44 35 struct workspace { 45 36 z_stream inf_strm; 46 37 z_stream def_strm; ··· 39 48 struct list_head list; 40 49 }; 41 50 42 - static LIST_HEAD(idle_workspace); 43 - static DEFINE_SPINLOCK(workspace_lock); 44 - static unsigned long num_workspace; 45 - static atomic_t alloc_workspace = ATOMIC_INIT(0); 46 - static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); 47 - 48 - /* 49 - * this finds an available zlib workspace or allocates a new one 50 - * NULL or an ERR_PTR is returned if things go bad. 51 - */ 52 - static struct workspace *find_zlib_workspace(void) 51 + static void zlib_free_workspace(struct list_head *ws) 53 52 { 54 - struct workspace *workspace; 55 - int ret; 56 - int cpus = num_online_cpus(); 53 + struct workspace *workspace = list_entry(ws, struct workspace, list); 57 54 58 - again: 59 - spin_lock(&workspace_lock); 60 - if (!list_empty(&idle_workspace)) { 61 - workspace = list_entry(idle_workspace.next, struct workspace, 62 - list); 63 - list_del(&workspace->list); 64 - num_workspace--; 65 - spin_unlock(&workspace_lock); 66 - return workspace; 67 - 68 - } 69 - if (atomic_read(&alloc_workspace) > cpus) { 70 - DEFINE_WAIT(wait); 71 - 72 - spin_unlock(&workspace_lock); 73 - prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 74 - if (atomic_read(&alloc_workspace) > cpus && !num_workspace) 75 - schedule(); 76 - finish_wait(&workspace_wait, &wait); 77 - goto again; 78 - } 79 - atomic_inc(&alloc_workspace); 80 - spin_unlock(&workspace_lock); 81 - 82 - workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 83 - if (!workspace) { 84 - ret = -ENOMEM; 85 - goto fail; 86 - } 87 - 88 - workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 89 - if (!workspace->def_strm.workspace) { 90 - ret = -ENOMEM; 91 - goto fail; 92 - } 93 - workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 94 - if (!workspace->inf_strm.workspace) { 95 - ret = -ENOMEM; 96 - goto fail_inflate; 97 - } 98 - workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 99 - if (!workspace->buf) { 100 - ret = -ENOMEM; 101 - goto fail_kmalloc; 102 - } 103 - return workspace; 104 - 105 - fail_kmalloc: 106 - vfree(workspace->inf_strm.workspace); 107 - fail_inflate: 108 - vfree(workspace->def_strm.workspace); 109 - fail: 110 - kfree(workspace); 111 - atomic_dec(&alloc_workspace); 112 - wake_up(&workspace_wait); 113 - return ERR_PTR(ret); 114 - } 115 - 116 - /* 117 - * put a workspace struct back on the list or free it if we have enough 118 - * idle ones sitting around 119 - */ 120 - static int free_workspace(struct workspace *workspace) 121 - { 122 - spin_lock(&workspace_lock); 123 - if (num_workspace < num_online_cpus()) { 124 - list_add_tail(&workspace->list, &idle_workspace); 125 - num_workspace++; 126 - spin_unlock(&workspace_lock); 127 - if (waitqueue_active(&workspace_wait)) 128 - wake_up(&workspace_wait); 129 - return 0; 130 - } 131 - spin_unlock(&workspace_lock); 132 55 vfree(workspace->def_strm.workspace); 133 56 vfree(workspace->inf_strm.workspace); 134 57 kfree(workspace->buf); 135 58 kfree(workspace); 136 - 137 - atomic_dec(&alloc_workspace); 138 - if (waitqueue_active(&workspace_wait)) 139 - wake_up(&workspace_wait); 140 - return 0; 141 59 } 142 60 143 - /* 144 - * cleanup function for module exit 145 - */ 146 - static void free_workspaces(void) 61 + static struct list_head *zlib_alloc_workspace(void) 147 62 { 148 63 struct workspace *workspace; 149 - while (!list_empty(&idle_workspace)) { 150 - workspace = list_entry(idle_workspace.next, struct workspace, 151 - list); 152 - list_del(&workspace->list); 153 - vfree(workspace->def_strm.workspace); 154 - vfree(workspace->inf_strm.workspace); 155 - kfree(workspace->buf); 156 - kfree(workspace); 157 - atomic_dec(&alloc_workspace); 158 - } 64 + 65 + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 66 + if (!workspace) 67 + return ERR_PTR(-ENOMEM); 68 + 69 + workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 70 + workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 71 + workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 72 + if (!workspace->def_strm.workspace || 73 + !workspace->inf_strm.workspace || !workspace->buf) 74 + goto fail; 75 + 76 + INIT_LIST_HEAD(&workspace->list); 77 + 78 + return &workspace->list; 79 + fail: 80 + zlib_free_workspace(&workspace->list); 81 + return ERR_PTR(-ENOMEM); 159 82 } 160 83 161 - /* 162 - * given an address space and start/len, compress the bytes. 163 - * 164 - * pages are allocated to hold the compressed result and stored 165 - * in 'pages' 166 - * 167 - * out_pages is used to return the number of pages allocated. There 168 - * may be pages allocated even if we return an error 169 - * 170 - * total_in is used to return the number of bytes actually read. It 171 - * may be smaller then len if we had to exit early because we 172 - * ran out of room in the pages array or because we cross the 173 - * max_out threshold. 174 - * 175 - * total_out is used to return the total number of compressed bytes 176 - * 177 - * max_out tells us the max number of bytes that we're allowed to 178 - * stuff into pages 179 - */ 180 - int btrfs_zlib_compress_pages(struct address_space *mapping, 181 - u64 start, unsigned long len, 182 - struct page **pages, 183 - unsigned long nr_dest_pages, 184 - unsigned long *out_pages, 185 - unsigned long *total_in, 186 - unsigned long *total_out, 187 - unsigned long max_out) 84 + static int zlib_compress_pages(struct list_head *ws, 85 + struct address_space *mapping, 86 + u64 start, unsigned long len, 87 + struct page **pages, 88 + unsigned long nr_dest_pages, 89 + unsigned long *out_pages, 90 + unsigned long *total_in, 91 + unsigned long *total_out, 92 + unsigned long max_out) 188 93 { 94 + struct workspace *workspace = list_entry(ws, struct workspace, list); 189 95 int ret; 190 - struct workspace *workspace; 191 96 char *data_in; 192 97 char *cpage_out; 193 98 int nr_pages = 0; ··· 94 207 *out_pages = 0; 95 208 *total_out = 0; 96 209 *total_in = 0; 97 - 98 - workspace = find_zlib_workspace(); 99 - if (IS_ERR(workspace)) 100 - return -1; 101 210 102 211 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 103 212 printk(KERN_WARNING "deflateInit failed\n"); ··· 208 325 kunmap(in_page); 209 326 page_cache_release(in_page); 210 327 } 211 - free_workspace(workspace); 212 328 return ret; 213 329 } 214 330 215 - /* 216 - * pages_in is an array of pages with compressed data. 217 - * 218 - * disk_start is the starting logical offset of this array in the file 219 - * 220 - * bvec is a bio_vec of pages from the file that we want to decompress into 221 - * 222 - * vcnt is the count of pages in the biovec 223 - * 224 - * srclen is the number of bytes in pages_in 225 - * 226 - * The basic idea is that we have a bio that was created by readpages. 227 - * The pages in the bio are for the uncompressed data, and they may not 228 - * be contiguous. They all correspond to the range of bytes covered by 229 - * the compressed extent. 230 - */ 231 - int btrfs_zlib_decompress_biovec(struct page **pages_in, 232 - u64 disk_start, 233 - struct bio_vec *bvec, 234 - int vcnt, 235 - size_t srclen) 331 + static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, 332 + u64 disk_start, 333 + struct bio_vec *bvec, 334 + int vcnt, 335 + size_t srclen) 236 336 { 337 + struct workspace *workspace = list_entry(ws, struct workspace, list); 237 338 int ret = 0; 238 339 int wbits = MAX_WBITS; 239 - struct workspace *workspace; 240 340 char *data_in; 241 341 size_t total_out = 0; 242 342 unsigned long page_bytes_left; ··· 236 370 unsigned long start_byte; 237 371 unsigned long current_buf_start; 238 372 char *kaddr; 239 - 240 - workspace = find_zlib_workspace(); 241 - if (IS_ERR(workspace)) 242 - return -ENOMEM; 243 373 244 374 data_in = kmap(pages_in[page_in_index]); 245 375 workspace->inf_strm.next_in = data_in; ··· 262 400 263 401 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 264 402 printk(KERN_WARNING "inflateInit failed\n"); 265 - ret = -1; 266 - goto out; 403 + return -1; 267 404 } 268 405 while (workspace->inf_strm.total_in < srclen) { 269 406 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); ··· 388 527 zlib_inflateEnd(&workspace->inf_strm); 389 528 if (data_in) 390 529 kunmap(pages_in[page_in_index]); 391 - out: 392 - free_workspace(workspace); 393 530 return ret; 394 531 } 395 532 396 - /* 397 - * a less complex decompression routine. Our compressed data fits in a 398 - * single page, and we want to read a single page out of it. 399 - * start_byte tells us the offset into the compressed data we're interested in 400 - */ 401 - int btrfs_zlib_decompress(unsigned char *data_in, 402 - struct page *dest_page, 403 - unsigned long start_byte, 404 - size_t srclen, size_t destlen) 533 + static int zlib_decompress(struct list_head *ws, unsigned char *data_in, 534 + struct page *dest_page, 535 + unsigned long start_byte, 536 + size_t srclen, size_t destlen) 405 537 { 538 + struct workspace *workspace = list_entry(ws, struct workspace, list); 406 539 int ret = 0; 407 540 int wbits = MAX_WBITS; 408 - struct workspace *workspace; 409 541 unsigned long bytes_left = destlen; 410 542 unsigned long total_out = 0; 411 543 char *kaddr; 412 - 413 - if (destlen > PAGE_CACHE_SIZE) 414 - return -ENOMEM; 415 - 416 - workspace = find_zlib_workspace(); 417 - if (IS_ERR(workspace)) 418 - return -ENOMEM; 419 544 420 545 workspace->inf_strm.next_in = data_in; 421 546 workspace->inf_strm.avail_in = srclen; ··· 423 576 424 577 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 425 578 printk(KERN_WARNING "inflateInit failed\n"); 426 - ret = -1; 427 - goto out; 579 + return -1; 428 580 } 429 581 430 582 while (bytes_left > 0) { ··· 473 627 ret = 0; 474 628 475 629 zlib_inflateEnd(&workspace->inf_strm); 476 - out: 477 - free_workspace(workspace); 478 630 return ret; 479 631 } 480 632 481 - void btrfs_zlib_exit(void) 482 - { 483 - free_workspaces(); 484 - } 633 + struct btrfs_compress_op btrfs_zlib_compress = { 634 + .alloc_workspace = zlib_alloc_workspace, 635 + .free_workspace = zlib_free_workspace, 636 + .compress_pages = zlib_compress_pages, 637 + .decompress_biovec = zlib_decompress_biovec, 638 + .decompress = zlib_decompress, 639 + };