commit eee2a817df7c5a6e569f353f8be78cc1b3604bb6 · tjh.dev/kernel

+2

fs/btrfs/Kconfig

··· 4 select LIBCRC32C 5 select ZLIB_INFLATE 6 select ZLIB_DEFLATE 7 help 8 Btrfs is a new filesystem with extents, writable snapshotting, 9 support for multiple devices and many more features.

··· 4 select LIBCRC32C 5 select ZLIB_INFLATE 6 select ZLIB_DEFLATE 7 + select LZO_COMPRESS 8 + select LZO_DECOMPRESS 9 help 10 Btrfs is a new filesystem with extents, writable snapshotting, 11 support for multiple devices and many more features.

+1 -1

fs/btrfs/Makefile

··· 6 transaction.o inode.o file.o tree-defrag.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 - export.o tree-log.o acl.o free-space-cache.o zlib.o \ 10 compression.o delayed-ref.o relocation.o

··· 6 transaction.o inode.o file.o tree-defrag.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 + export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 10 compression.o delayed-ref.o relocation.o

+3 -1

fs/btrfs/acl.c

··· 60 size = __btrfs_getxattr(inode, name, value, size); 61 if (size > 0) { 62 acl = posix_acl_from_xattr(value, size); 63 - if (IS_ERR(acl)) 64 return acl; 65 set_cached_acl(inode, type, acl); 66 } 67 kfree(value);

··· 60 size = __btrfs_getxattr(inode, name, value, size); 61 if (size > 0) { 62 acl = posix_acl_from_xattr(value, size); 63 + if (IS_ERR(acl)) { 64 + kfree(value); 65 return acl; 66 + } 67 set_cached_acl(inode, type, acl); 68 } 69 kfree(value);

+1 -1

fs/btrfs/btrfs_inode.h

··· 157 /* 158 * always compress this one file 159 */ 160 - unsigned force_compress:1; 161 162 struct inode vfs_inode; 163 };

··· 157 /* 158 * always compress this one file 159 */ 160 + unsigned force_compress:4; 161 162 struct inode vfs_inode; 163 };

+324 -5

fs/btrfs/compression.c

··· 62 /* number of bytes on disk */ 63 unsigned long compressed_len; 64 65 /* number of compressed pages in the array */ 66 unsigned long nr_pages; 67 ··· 176 /* ok, we're the last bio for this extent, lets start 177 * the decompression. 178 */ 179 - ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 180 - cb->start, 181 - cb->orig_bio->bi_io_vec, 182 - cb->orig_bio->bi_vcnt, 183 - cb->compressed_len); 184 csum_failed: 185 if (ret) 186 cb->errors = 1; ··· 592 593 cb->len = uncompressed_len; 594 cb->compressed_len = compressed_len; 595 cb->orig_bio = bio; 596 597 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / ··· 681 682 bio_put(comp_bio); 683 return 0; 684 }

··· 62 /* number of bytes on disk */ 63 unsigned long compressed_len; 64 65 + /* the compression algorithm for this bio */ 66 + int compress_type; 67 + 68 /* number of compressed pages in the array */ 69 unsigned long nr_pages; 70 ··· 173 /* ok, we're the last bio for this extent, lets start 174 * the decompression. 175 */ 176 + ret = btrfs_decompress_biovec(cb->compress_type, 177 + cb->compressed_pages, 178 + cb->start, 179 + cb->orig_bio->bi_io_vec, 180 + cb->orig_bio->bi_vcnt, 181 + cb->compressed_len); 182 csum_failed: 183 if (ret) 184 cb->errors = 1; ··· 588 589 cb->len = uncompressed_len; 590 cb->compressed_len = compressed_len; 591 + cb->compress_type = extent_compress_type(bio_flags); 592 cb->orig_bio = bio; 593 594 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / ··· 676 677 bio_put(comp_bio); 678 return 0; 679 + } 680 + 681 + static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; 682 + static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; 683 + static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; 684 + static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 685 + static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 686 + 687 + struct btrfs_compress_op *btrfs_compress_op[] = { 688 + &btrfs_zlib_compress, 689 + &btrfs_lzo_compress, 690 + }; 691 + 692 + int __init btrfs_init_compress(void) 693 + { 694 + int i; 695 + 696 + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { 697 + INIT_LIST_HEAD(&comp_idle_workspace[i]); 698 + spin_lock_init(&comp_workspace_lock[i]); 699 + atomic_set(&comp_alloc_workspace[i], 0); 700 + init_waitqueue_head(&comp_workspace_wait[i]); 701 + } 702 + return 0; 703 + } 704 + 705 + /* 706 + * this finds an available workspace or allocates a new one 707 + * ERR_PTR is returned if things go bad. 708 + */ 709 + static struct list_head *find_workspace(int type) 710 + { 711 + struct list_head *workspace; 712 + int cpus = num_online_cpus(); 713 + int idx = type - 1; 714 + 715 + struct list_head *idle_workspace = &comp_idle_workspace[idx]; 716 + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; 717 + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; 718 + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; 719 + int *num_workspace = &comp_num_workspace[idx]; 720 + again: 721 + spin_lock(workspace_lock); 722 + if (!list_empty(idle_workspace)) { 723 + workspace = idle_workspace->next; 724 + list_del(workspace); 725 + (*num_workspace)--; 726 + spin_unlock(workspace_lock); 727 + return workspace; 728 + 729 + } 730 + if (atomic_read(alloc_workspace) > cpus) { 731 + DEFINE_WAIT(wait); 732 + 733 + spin_unlock(workspace_lock); 734 + prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 735 + if (atomic_read(alloc_workspace) > cpus && !*num_workspace) 736 + schedule(); 737 + finish_wait(workspace_wait, &wait); 738 + goto again; 739 + } 740 + atomic_inc(alloc_workspace); 741 + spin_unlock(workspace_lock); 742 + 743 + workspace = btrfs_compress_op[idx]->alloc_workspace(); 744 + if (IS_ERR(workspace)) { 745 + atomic_dec(alloc_workspace); 746 + wake_up(workspace_wait); 747 + } 748 + return workspace; 749 + } 750 + 751 + /* 752 + * put a workspace struct back on the list or free it if we have enough 753 + * idle ones sitting around 754 + */ 755 + static void free_workspace(int type, struct list_head *workspace) 756 + { 757 + int idx = type - 1; 758 + struct list_head *idle_workspace = &comp_idle_workspace[idx]; 759 + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; 760 + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; 761 + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; 762 + int *num_workspace = &comp_num_workspace[idx]; 763 + 764 + spin_lock(workspace_lock); 765 + if (*num_workspace < num_online_cpus()) { 766 + list_add_tail(workspace, idle_workspace); 767 + (*num_workspace)++; 768 + spin_unlock(workspace_lock); 769 + goto wake; 770 + } 771 + spin_unlock(workspace_lock); 772 + 773 + btrfs_compress_op[idx]->free_workspace(workspace); 774 + atomic_dec(alloc_workspace); 775 + wake: 776 + if (waitqueue_active(workspace_wait)) 777 + wake_up(workspace_wait); 778 + } 779 + 780 + /* 781 + * cleanup function for module exit 782 + */ 783 + static void free_workspaces(void) 784 + { 785 + struct list_head *workspace; 786 + int i; 787 + 788 + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { 789 + while (!list_empty(&comp_idle_workspace[i])) { 790 + workspace = comp_idle_workspace[i].next; 791 + list_del(workspace); 792 + btrfs_compress_op[i]->free_workspace(workspace); 793 + atomic_dec(&comp_alloc_workspace[i]); 794 + } 795 + } 796 + } 797 + 798 + /* 799 + * given an address space and start/len, compress the bytes. 800 + * 801 + * pages are allocated to hold the compressed result and stored 802 + * in 'pages' 803 + * 804 + * out_pages is used to return the number of pages allocated. There 805 + * may be pages allocated even if we return an error 806 + * 807 + * total_in is used to return the number of bytes actually read. It 808 + * may be smaller then len if we had to exit early because we 809 + * ran out of room in the pages array or because we cross the 810 + * max_out threshold. 811 + * 812 + * total_out is used to return the total number of compressed bytes 813 + * 814 + * max_out tells us the max number of bytes that we're allowed to 815 + * stuff into pages 816 + */ 817 + int btrfs_compress_pages(int type, struct address_space *mapping, 818 + u64 start, unsigned long len, 819 + struct page **pages, 820 + unsigned long nr_dest_pages, 821 + unsigned long *out_pages, 822 + unsigned long *total_in, 823 + unsigned long *total_out, 824 + unsigned long max_out) 825 + { 826 + struct list_head *workspace; 827 + int ret; 828 + 829 + workspace = find_workspace(type); 830 + if (IS_ERR(workspace)) 831 + return -1; 832 + 833 + ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, 834 + start, len, pages, 835 + nr_dest_pages, out_pages, 836 + total_in, total_out, 837 + max_out); 838 + free_workspace(type, workspace); 839 + return ret; 840 + } 841 + 842 + /* 843 + * pages_in is an array of pages with compressed data. 844 + * 845 + * disk_start is the starting logical offset of this array in the file 846 + * 847 + * bvec is a bio_vec of pages from the file that we want to decompress into 848 + * 849 + * vcnt is the count of pages in the biovec 850 + * 851 + * srclen is the number of bytes in pages_in 852 + * 853 + * The basic idea is that we have a bio that was created by readpages. 854 + * The pages in the bio are for the uncompressed data, and they may not 855 + * be contiguous. They all correspond to the range of bytes covered by 856 + * the compressed extent. 857 + */ 858 + int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 859 + struct bio_vec *bvec, int vcnt, size_t srclen) 860 + { 861 + struct list_head *workspace; 862 + int ret; 863 + 864 + workspace = find_workspace(type); 865 + if (IS_ERR(workspace)) 866 + return -ENOMEM; 867 + 868 + ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, 869 + disk_start, 870 + bvec, vcnt, srclen); 871 + free_workspace(type, workspace); 872 + return ret; 873 + } 874 + 875 + /* 876 + * a less complex decompression routine. Our compressed data fits in a 877 + * single page, and we want to read a single page out of it. 878 + * start_byte tells us the offset into the compressed data we're interested in 879 + */ 880 + int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 881 + unsigned long start_byte, size_t srclen, size_t destlen) 882 + { 883 + struct list_head *workspace; 884 + int ret; 885 + 886 + workspace = find_workspace(type); 887 + if (IS_ERR(workspace)) 888 + return -ENOMEM; 889 + 890 + ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, 891 + dest_page, start_byte, 892 + srclen, destlen); 893 + 894 + free_workspace(type, workspace); 895 + return ret; 896 + } 897 + 898 + void __exit btrfs_exit_compress(void) 899 + { 900 + free_workspaces(); 901 + } 902 + 903 + /* 904 + * Copy uncompressed data from working buffer to pages. 905 + * 906 + * buf_start is the byte offset we're of the start of our workspace buffer. 907 + * 908 + * total_out is the last byte of the buffer 909 + */ 910 + int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 911 + unsigned long total_out, u64 disk_start, 912 + struct bio_vec *bvec, int vcnt, 913 + unsigned long *page_index, 914 + unsigned long *pg_offset) 915 + { 916 + unsigned long buf_offset; 917 + unsigned long current_buf_start; 918 + unsigned long start_byte; 919 + unsigned long working_bytes = total_out - buf_start; 920 + unsigned long bytes; 921 + char *kaddr; 922 + struct page *page_out = bvec[*page_index].bv_page; 923 + 924 + /* 925 + * start byte is the first byte of the page we're currently 926 + * copying into relative to the start of the compressed data. 927 + */ 928 + start_byte = page_offset(page_out) - disk_start; 929 + 930 + /* we haven't yet hit data corresponding to this page */ 931 + if (total_out <= start_byte) 932 + return 1; 933 + 934 + /* 935 + * the start of the data we care about is offset into 936 + * the middle of our working buffer 937 + */ 938 + if (total_out > start_byte && buf_start < start_byte) { 939 + buf_offset = start_byte - buf_start; 940 + working_bytes -= buf_offset; 941 + } else { 942 + buf_offset = 0; 943 + } 944 + current_buf_start = buf_start; 945 + 946 + /* copy bytes from the working buffer into the pages */ 947 + while (working_bytes > 0) { 948 + bytes = min(PAGE_CACHE_SIZE - *pg_offset, 949 + PAGE_CACHE_SIZE - buf_offset); 950 + bytes = min(bytes, working_bytes); 951 + kaddr = kmap_atomic(page_out, KM_USER0); 952 + memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 953 + kunmap_atomic(kaddr, KM_USER0); 954 + flush_dcache_page(page_out); 955 + 956 + *pg_offset += bytes; 957 + buf_offset += bytes; 958 + working_bytes -= bytes; 959 + current_buf_start += bytes; 960 + 961 + /* check if we need to pick another page */ 962 + if (*pg_offset == PAGE_CACHE_SIZE) { 963 + (*page_index)++; 964 + if (*page_index >= vcnt) 965 + return 0; 966 + 967 + page_out = bvec[*page_index].bv_page; 968 + *pg_offset = 0; 969 + start_byte = page_offset(page_out) - disk_start; 970 + 971 + /* 972 + * make sure our new page is covered by this 973 + * working buffer 974 + */ 975 + if (total_out <= start_byte) 976 + return 1; 977 + 978 + /* 979 + * the next page in the biovec might not be adjacent 980 + * to the last page, but it might still be found 981 + * inside this working buffer. bump our offset pointer 982 + */ 983 + if (total_out > start_byte && 984 + current_buf_start < start_byte) { 985 + buf_offset = start_byte - buf_start; 986 + working_bytes = total_out - start_byte; 987 + current_buf_start = buf_start + buf_offset; 988 + } 989 + } 990 + } 991 + 992 + return 1; 993 }

+54 -18

fs/btrfs/compression.h

··· 19 #ifndef __BTRFS_COMPRESSION_ 20 #define __BTRFS_COMPRESSION_ 21 22 - int btrfs_zlib_decompress(unsigned char *data_in, 23 - struct page *dest_page, 24 - unsigned long start_byte, 25 - size_t srclen, size_t destlen); 26 - int btrfs_zlib_compress_pages(struct address_space *mapping, 27 - u64 start, unsigned long len, 28 - struct page **pages, 29 - unsigned long nr_dest_pages, 30 - unsigned long *out_pages, 31 - unsigned long *total_in, 32 - unsigned long *total_out, 33 - unsigned long max_out); 34 - int btrfs_zlib_decompress_biovec(struct page **pages_in, 35 - u64 disk_start, 36 - struct bio_vec *bvec, 37 - int vcnt, 38 - size_t srclen); 39 - void btrfs_zlib_exit(void); 40 int btrfs_submit_compressed_write(struct inode *inode, u64 start, 41 unsigned long len, u64 disk_start, 42 unsigned long compressed_len, ··· 47 unsigned long nr_pages); 48 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 49 int mirror_num, unsigned long bio_flags); 50 #endif

··· 19 #ifndef __BTRFS_COMPRESSION_ 20 #define __BTRFS_COMPRESSION_ 21 22 + int btrfs_init_compress(void); 23 + void btrfs_exit_compress(void); 24 + 25 + int btrfs_compress_pages(int type, struct address_space *mapping, 26 + u64 start, unsigned long len, 27 + struct page **pages, 28 + unsigned long nr_dest_pages, 29 + unsigned long *out_pages, 30 + unsigned long *total_in, 31 + unsigned long *total_out, 32 + unsigned long max_out); 33 + int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 34 + struct bio_vec *bvec, int vcnt, size_t srclen); 35 + int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 36 + unsigned long start_byte, size_t srclen, size_t destlen); 37 + int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 38 + unsigned long total_out, u64 disk_start, 39 + struct bio_vec *bvec, int vcnt, 40 + unsigned long *page_index, 41 + unsigned long *pg_offset); 42 + 43 int btrfs_submit_compressed_write(struct inode *inode, u64 start, 44 unsigned long len, u64 disk_start, 45 unsigned long compressed_len, ··· 44 unsigned long nr_pages); 45 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 46 int mirror_num, unsigned long bio_flags); 47 + 48 + struct btrfs_compress_op { 49 + struct list_head *(*alloc_workspace)(void); 50 + 51 + void (*free_workspace)(struct list_head *workspace); 52 + 53 + int (*compress_pages)(struct list_head *workspace, 54 + struct address_space *mapping, 55 + u64 start, unsigned long len, 56 + struct page **pages, 57 + unsigned long nr_dest_pages, 58 + unsigned long *out_pages, 59 + unsigned long *total_in, 60 + unsigned long *total_out, 61 + unsigned long max_out); 62 + 63 + int (*decompress_biovec)(struct list_head *workspace, 64 + struct page **pages_in, 65 + u64 disk_start, 66 + struct bio_vec *bvec, 67 + int vcnt, 68 + size_t srclen); 69 + 70 + int (*decompress)(struct list_head *workspace, 71 + unsigned char *data_in, 72 + struct page *dest_page, 73 + unsigned long start_byte, 74 + size_t srclen, size_t destlen); 75 + }; 76 + 77 + extern struct btrfs_compress_op btrfs_zlib_compress; 78 + extern struct btrfs_compress_op btrfs_lzo_compress; 79 + 80 #endif

+8

fs/btrfs/ctree.c

··· 105 /* this also releases the path */ 106 void btrfs_free_path(struct btrfs_path *p) 107 { 108 btrfs_release_path(NULL, p); 109 kmem_cache_free(btrfs_path_cachep, p); 110 } ··· 2516 btrfs_assert_tree_locked(path->nodes[1]); 2517 2518 right = read_node_slot(root, upper, slot + 1); 2519 btrfs_tree_lock(right); 2520 btrfs_set_lock_blocking(right); 2521 ··· 2769 btrfs_assert_tree_locked(path->nodes[1]); 2770 2771 left = read_node_slot(root, path->nodes[1], slot - 1); 2772 btrfs_tree_lock(left); 2773 btrfs_set_lock_blocking(left); 2774

··· 105 /* this also releases the path */ 106 void btrfs_free_path(struct btrfs_path *p) 107 { 108 + if (!p) 109 + return; 110 btrfs_release_path(NULL, p); 111 kmem_cache_free(btrfs_path_cachep, p); 112 } ··· 2514 btrfs_assert_tree_locked(path->nodes[1]); 2515 2516 right = read_node_slot(root, upper, slot + 1); 2517 + if (right == NULL) 2518 + return 1; 2519 + 2520 btrfs_tree_lock(right); 2521 btrfs_set_lock_blocking(right); 2522 ··· 2764 btrfs_assert_tree_locked(path->nodes[1]); 2765 2766 left = read_node_slot(root, path->nodes[1], slot - 1); 2767 + if (left == NULL) 2768 + return 1; 2769 + 2770 btrfs_tree_lock(left); 2771 btrfs_set_lock_blocking(left); 2772

+43 -5

fs/btrfs/ctree.h

··· 295 #define BTRFS_FSID_SIZE 16 296 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 297 #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 298 #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) 299 #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) 300 ··· 407 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 408 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 409 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 410 411 #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 412 #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 413 #define BTRFS_FEATURE_INCOMPAT_SUPP \ 414 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 415 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 416 - BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 417 418 /* 419 * A leaf is full of items. offset and size tell us where to find ··· 562 } __attribute__ ((__packed__)); 563 564 enum btrfs_compression_type { 565 - BTRFS_COMPRESS_NONE = 0, 566 - BTRFS_COMPRESS_ZLIB = 1, 567 - BTRFS_COMPRESS_LAST = 2, 568 }; 569 570 struct btrfs_inode_item { ··· 609 __le16 name_len; 610 u8 type; 611 } __attribute__ ((__packed__)); 612 613 struct btrfs_root_item { 614 struct btrfs_inode_item inode; ··· 910 */ 911 u64 last_trans_log_full_commit; 912 u64 open_ioctl_trans; 913 - unsigned long mount_opt; 914 u64 max_inline; 915 u64 alloc_start; 916 struct btrfs_transaction *running_transaction; ··· 1066 unsigned metadata_ratio; 1067 1068 void *bdev_holder; 1069 }; 1070 1071 /* ··· 1912 BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, 1913 last_snapshot, 64); 1914 1915 /* struct btrfs_super_block */ 1916 1917 BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); ··· 2169 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 2170 struct btrfs_root *root, u64 group_start); 2171 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 2172 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 2173 void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2174 int btrfs_check_data_free_space(struct inode *inode, u64 bytes); ··· 2213 int btrfs_set_block_group_rw(struct btrfs_root *root, 2214 struct btrfs_block_group_cache *cache); 2215 void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2216 /* ctree.c */ 2217 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2218 int level, int *slot); ··· 2572 /* super.c */ 2573 int btrfs_parse_options(struct btrfs_root *root, char *options); 2574 int btrfs_sync_fs(struct super_block *sb, int wait); 2575 2576 /* acl.c */ 2577 #ifdef CONFIG_BTRFS_FS_POSIX_ACL

··· 295 #define BTRFS_FSID_SIZE 16 296 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 297 #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 298 + 299 + /* 300 + * File system states 301 + */ 302 + 303 + /* Errors detected */ 304 + #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) 305 + 306 #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) 307 #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) 308 ··· 399 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 400 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 401 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 402 + #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 403 404 #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 405 #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 406 #define BTRFS_FEATURE_INCOMPAT_SUPP \ 407 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 408 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 409 + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 410 + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) 411 412 /* 413 * A leaf is full of items. offset and size tell us where to find ··· 552 } __attribute__ ((__packed__)); 553 554 enum btrfs_compression_type { 555 + BTRFS_COMPRESS_NONE = 0, 556 + BTRFS_COMPRESS_ZLIB = 1, 557 + BTRFS_COMPRESS_LZO = 2, 558 + BTRFS_COMPRESS_TYPES = 2, 559 + BTRFS_COMPRESS_LAST = 3, 560 }; 561 562 struct btrfs_inode_item { ··· 597 __le16 name_len; 598 u8 type; 599 } __attribute__ ((__packed__)); 600 + 601 + #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) 602 603 struct btrfs_root_item { 604 struct btrfs_inode_item inode; ··· 896 */ 897 u64 last_trans_log_full_commit; 898 u64 open_ioctl_trans; 899 + unsigned long mount_opt:20; 900 + unsigned long compress_type:4; 901 u64 max_inline; 902 u64 alloc_start; 903 struct btrfs_transaction *running_transaction; ··· 1051 unsigned metadata_ratio; 1052 1053 void *bdev_holder; 1054 + 1055 + /* filesystem state */ 1056 + u64 fs_state; 1057 }; 1058 1059 /* ··· 1894 BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, 1895 last_snapshot, 64); 1896 1897 + static inline bool btrfs_root_readonly(struct btrfs_root *root) 1898 + { 1899 + return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; 1900 + } 1901 + 1902 /* struct btrfs_super_block */ 1903 1904 BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); ··· 2146 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 2147 struct btrfs_root *root, u64 group_start); 2148 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 2149 + u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); 2150 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 2151 void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2152 int btrfs_check_data_free_space(struct inode *inode, u64 bytes); ··· 2189 int btrfs_set_block_group_rw(struct btrfs_root *root, 2190 struct btrfs_block_group_cache *cache); 2191 void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2192 + u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2193 + int btrfs_error_unpin_extent_range(struct btrfs_root *root, 2194 + u64 start, u64 end); 2195 + int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2196 + u64 num_bytes); 2197 + 2198 /* ctree.c */ 2199 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2200 int level, int *slot); ··· 2542 /* super.c */ 2543 int btrfs_parse_options(struct btrfs_root *root, char *options); 2544 int btrfs_sync_fs(struct super_block *sb, int wait); 2545 + void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 2546 + unsigned int line, int errno); 2547 + 2548 + #define btrfs_std_error(fs_info, errno) \ 2549 + do { \ 2550 + if ((errno)) \ 2551 + __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ 2552 + } while (0) 2553 2554 /* acl.c */ 2555 #ifdef CONFIG_BTRFS_FS_POSIX_ACL

+405 -7

fs/btrfs/disk-io.c

··· 44 static struct extent_io_ops btree_extent_io_ops; 45 static void end_workqueue_fn(struct btrfs_work *work); 46 static void free_fs_root(struct btrfs_root *root); 47 48 /* 49 * end_io_wq structs are used to do processing in task context when an IO is ··· 367 WARN_ON(len == 0); 368 369 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 370 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 371 btrfs_header_generation(eb)); 372 BUG_ON(ret); ··· 445 WARN_ON(len == 0); 446 447 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 448 449 found_start = btrfs_header_bytenr(eb); 450 if (found_start != start) { ··· 1167 } 1168 btrfs_free_path(path); 1169 if (ret) { 1170 if (ret > 0) 1171 ret = -ENOENT; 1172 return ERR_PTR(ret); ··· 1736 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1737 1738 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1739 - if (!bh) 1740 goto fail_iput; 1741 1742 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1743 memcpy(&fs_info->super_for_commit, &fs_info->super_copy, ··· 1751 disk_super = &fs_info->super_copy; 1752 if (!btrfs_super_root(disk_super)) 1753 goto fail_iput; 1754 1755 ret = btrfs_parse_options(tree_root, options); 1756 if (ret) { ··· 1774 } 1775 1776 features = btrfs_super_incompat_flags(disk_super); 1777 - if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { 1778 - features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 1779 - btrfs_set_super_incompat_flags(disk_super, features); 1780 - } 1781 1782 features = btrfs_super_compat_ro_flags(disk_super) & 1783 ~BTRFS_FEATURE_COMPAT_RO_SUPP; ··· 1987 btrfs_set_opt(fs_info->mount_opt, SSD); 1988 } 1989 1990 - if (btrfs_super_log_root(disk_super) != 0) { 1991 u64 bytenr = btrfs_super_log_root(disk_super); 1992 1993 if (fs_devices->rw_devices == 0) { ··· 2474 smp_mb(); 2475 2476 btrfs_put_block_group_cache(fs_info); 2477 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2478 - ret = btrfs_commit_super(root); 2479 if (ret) 2480 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2481 } ··· 2668 free_extent_buffer(eb); 2669 out: 2670 lock_page(page); 2671 return 0; 2672 } 2673

··· 44 static struct extent_io_ops btree_extent_io_ops; 45 static void end_workqueue_fn(struct btrfs_work *work); 46 static void free_fs_root(struct btrfs_root *root); 47 + static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 48 + int read_only); 49 + static int btrfs_destroy_ordered_operations(struct btrfs_root *root); 50 + static int btrfs_destroy_ordered_extents(struct btrfs_root *root); 51 + static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 52 + struct btrfs_root *root); 53 + static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 54 + static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 55 + static int btrfs_destroy_marked_extents(struct btrfs_root *root, 56 + struct extent_io_tree *dirty_pages, 57 + int mark); 58 + static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 59 + struct extent_io_tree *pinned_extents); 60 + static int btrfs_cleanup_transaction(struct btrfs_root *root); 61 62 /* 63 * end_io_wq structs are used to do processing in task context when an IO is ··· 353 WARN_ON(len == 0); 354 355 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 356 + if (eb == NULL) { 357 + WARN_ON(1); 358 + goto out; 359 + } 360 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 361 btrfs_header_generation(eb)); 362 BUG_ON(ret); ··· 427 WARN_ON(len == 0); 428 429 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 430 + if (eb == NULL) { 431 + ret = -EIO; 432 + goto out; 433 + } 434 435 found_start = btrfs_header_bytenr(eb); 436 if (found_start != start) { ··· 1145 } 1146 btrfs_free_path(path); 1147 if (ret) { 1148 + kfree(root); 1149 if (ret > 0) 1150 ret = -ENOENT; 1151 return ERR_PTR(ret); ··· 1713 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1714 1715 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1716 + if (!bh) { 1717 + err = -EINVAL; 1718 goto fail_iput; 1719 + } 1720 1721 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1722 memcpy(&fs_info->super_for_commit, &fs_info->super_copy, ··· 1726 disk_super = &fs_info->super_copy; 1727 if (!btrfs_super_root(disk_super)) 1728 goto fail_iput; 1729 + 1730 + /* check FS state, whether FS is broken. */ 1731 + fs_info->fs_state |= btrfs_super_flags(disk_super); 1732 + 1733 + btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1734 1735 ret = btrfs_parse_options(tree_root, options); 1736 if (ret) { ··· 1744 } 1745 1746 features = btrfs_super_incompat_flags(disk_super); 1747 + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 1748 + if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 1749 + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 1750 + btrfs_set_super_incompat_flags(disk_super, features); 1751 1752 features = btrfs_super_compat_ro_flags(disk_super) & 1753 ~BTRFS_FEATURE_COMPAT_RO_SUPP; ··· 1957 btrfs_set_opt(fs_info->mount_opt, SSD); 1958 } 1959 1960 + /* do not make disk changes in broken FS */ 1961 + if (btrfs_super_log_root(disk_super) != 0 && 1962 + !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { 1963 u64 bytenr = btrfs_super_log_root(disk_super); 1964 1965 if (fs_devices->rw_devices == 0) { ··· 2442 smp_mb(); 2443 2444 btrfs_put_block_group_cache(fs_info); 2445 + 2446 + /* 2447 + * Here come 2 situations when btrfs is broken to flip readonly: 2448 + * 2449 + * 1. when btrfs flips readonly somewhere else before 2450 + * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, 2451 + * and btrfs will skip to write sb directly to keep 2452 + * ERROR state on disk. 2453 + * 2454 + * 2. when btrfs flips readonly just in btrfs_commit_super, 2455 + * and in such case, btrfs cannnot write sb via btrfs_commit_super, 2456 + * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 2457 + * btrfs will cleanup all FS resources first and write sb then. 2458 + */ 2459 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2460 + ret = btrfs_commit_super(root); 2461 + if (ret) 2462 + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2463 + } 2464 + 2465 + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 2466 + ret = btrfs_error_commit_super(root); 2467 if (ret) 2468 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2469 } ··· 2616 free_extent_buffer(eb); 2617 out: 2618 lock_page(page); 2619 + return 0; 2620 + } 2621 + 2622 + static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 2623 + int read_only) 2624 + { 2625 + if (read_only) 2626 + return; 2627 + 2628 + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 2629 + printk(KERN_WARNING "warning: mount fs with errors, " 2630 + "running btrfsck is recommended\n"); 2631 + } 2632 + 2633 + int btrfs_error_commit_super(struct btrfs_root *root) 2634 + { 2635 + int ret; 2636 + 2637 + mutex_lock(&root->fs_info->cleaner_mutex); 2638 + btrfs_run_delayed_iputs(root); 2639 + mutex_unlock(&root->fs_info->cleaner_mutex); 2640 + 2641 + down_write(&root->fs_info->cleanup_work_sem); 2642 + up_write(&root->fs_info->cleanup_work_sem); 2643 + 2644 + /* cleanup FS via transaction */ 2645 + btrfs_cleanup_transaction(root); 2646 + 2647 + ret = write_ctree_super(NULL, root, 0); 2648 + 2649 + return ret; 2650 + } 2651 + 2652 + static int btrfs_destroy_ordered_operations(struct btrfs_root *root) 2653 + { 2654 + struct btrfs_inode *btrfs_inode; 2655 + struct list_head splice; 2656 + 2657 + INIT_LIST_HEAD(&splice); 2658 + 2659 + mutex_lock(&root->fs_info->ordered_operations_mutex); 2660 + spin_lock(&root->fs_info->ordered_extent_lock); 2661 + 2662 + list_splice_init(&root->fs_info->ordered_operations, &splice); 2663 + while (!list_empty(&splice)) { 2664 + btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2665 + ordered_operations); 2666 + 2667 + list_del_init(&btrfs_inode->ordered_operations); 2668 + 2669 + btrfs_invalidate_inodes(btrfs_inode->root); 2670 + } 2671 + 2672 + spin_unlock(&root->fs_info->ordered_extent_lock); 2673 + mutex_unlock(&root->fs_info->ordered_operations_mutex); 2674 + 2675 + return 0; 2676 + } 2677 + 2678 + static int btrfs_destroy_ordered_extents(struct btrfs_root *root) 2679 + { 2680 + struct list_head splice; 2681 + struct btrfs_ordered_extent *ordered; 2682 + struct inode *inode; 2683 + 2684 + INIT_LIST_HEAD(&splice); 2685 + 2686 + spin_lock(&root->fs_info->ordered_extent_lock); 2687 + 2688 + list_splice_init(&root->fs_info->ordered_extents, &splice); 2689 + while (!list_empty(&splice)) { 2690 + ordered = list_entry(splice.next, struct btrfs_ordered_extent, 2691 + root_extent_list); 2692 + 2693 + list_del_init(&ordered->root_extent_list); 2694 + atomic_inc(&ordered->refs); 2695 + 2696 + /* the inode may be getting freed (in sys_unlink path). */ 2697 + inode = igrab(ordered->inode); 2698 + 2699 + spin_unlock(&root->fs_info->ordered_extent_lock); 2700 + if (inode) 2701 + iput(inode); 2702 + 2703 + atomic_set(&ordered->refs, 1); 2704 + btrfs_put_ordered_extent(ordered); 2705 + 2706 + spin_lock(&root->fs_info->ordered_extent_lock); 2707 + } 2708 + 2709 + spin_unlock(&root->fs_info->ordered_extent_lock); 2710 + 2711 + return 0; 2712 + } 2713 + 2714 + static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 2715 + struct btrfs_root *root) 2716 + { 2717 + struct rb_node *node; 2718 + struct btrfs_delayed_ref_root *delayed_refs; 2719 + struct btrfs_delayed_ref_node *ref; 2720 + int ret = 0; 2721 + 2722 + delayed_refs = &trans->delayed_refs; 2723 + 2724 + spin_lock(&delayed_refs->lock); 2725 + if (delayed_refs->num_entries == 0) { 2726 + printk(KERN_INFO "delayed_refs has NO entry\n"); 2727 + return ret; 2728 + } 2729 + 2730 + node = rb_first(&delayed_refs->root); 2731 + while (node) { 2732 + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2733 + node = rb_next(node); 2734 + 2735 + ref->in_tree = 0; 2736 + rb_erase(&ref->rb_node, &delayed_refs->root); 2737 + delayed_refs->num_entries--; 2738 + 2739 + atomic_set(&ref->refs, 1); 2740 + if (btrfs_delayed_ref_is_head(ref)) { 2741 + struct btrfs_delayed_ref_head *head; 2742 + 2743 + head = btrfs_delayed_node_to_head(ref); 2744 + mutex_lock(&head->mutex); 2745 + kfree(head->extent_op); 2746 + delayed_refs->num_heads--; 2747 + if (list_empty(&head->cluster)) 2748 + delayed_refs->num_heads_ready--; 2749 + list_del_init(&head->cluster); 2750 + mutex_unlock(&head->mutex); 2751 + } 2752 + 2753 + spin_unlock(&delayed_refs->lock); 2754 + btrfs_put_delayed_ref(ref); 2755 + 2756 + cond_resched(); 2757 + spin_lock(&delayed_refs->lock); 2758 + } 2759 + 2760 + spin_unlock(&delayed_refs->lock); 2761 + 2762 + return ret; 2763 + } 2764 + 2765 + static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 2766 + { 2767 + struct btrfs_pending_snapshot *snapshot; 2768 + struct list_head splice; 2769 + 2770 + INIT_LIST_HEAD(&splice); 2771 + 2772 + list_splice_init(&t->pending_snapshots, &splice); 2773 + 2774 + while (!list_empty(&splice)) { 2775 + snapshot = list_entry(splice.next, 2776 + struct btrfs_pending_snapshot, 2777 + list); 2778 + 2779 + list_del_init(&snapshot->list); 2780 + 2781 + kfree(snapshot); 2782 + } 2783 + 2784 + return 0; 2785 + } 2786 + 2787 + static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 2788 + { 2789 + struct btrfs_inode *btrfs_inode; 2790 + struct list_head splice; 2791 + 2792 + INIT_LIST_HEAD(&splice); 2793 + 2794 + list_splice_init(&root->fs_info->delalloc_inodes, &splice); 2795 + 2796 + spin_lock(&root->fs_info->delalloc_lock); 2797 + 2798 + while (!list_empty(&splice)) { 2799 + btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2800 + delalloc_inodes); 2801 + 2802 + list_del_init(&btrfs_inode->delalloc_inodes); 2803 + 2804 + btrfs_invalidate_inodes(btrfs_inode->root); 2805 + } 2806 + 2807 + spin_unlock(&root->fs_info->delalloc_lock); 2808 + 2809 + return 0; 2810 + } 2811 + 2812 + static int btrfs_destroy_marked_extents(struct btrfs_root *root, 2813 + struct extent_io_tree *dirty_pages, 2814 + int mark) 2815 + { 2816 + int ret; 2817 + struct page *page; 2818 + struct inode *btree_inode = root->fs_info->btree_inode; 2819 + struct extent_buffer *eb; 2820 + u64 start = 0; 2821 + u64 end; 2822 + u64 offset; 2823 + unsigned long index; 2824 + 2825 + while (1) { 2826 + ret = find_first_extent_bit(dirty_pages, start, &start, &end, 2827 + mark); 2828 + if (ret) 2829 + break; 2830 + 2831 + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 2832 + while (start <= end) { 2833 + index = start >> PAGE_CACHE_SHIFT; 2834 + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 2835 + page = find_get_page(btree_inode->i_mapping, index); 2836 + if (!page) 2837 + continue; 2838 + offset = page_offset(page); 2839 + 2840 + spin_lock(&dirty_pages->buffer_lock); 2841 + eb = radix_tree_lookup( 2842 + &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 2843 + offset >> PAGE_CACHE_SHIFT); 2844 + spin_unlock(&dirty_pages->buffer_lock); 2845 + if (eb) { 2846 + ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 2847 + &eb->bflags); 2848 + atomic_set(&eb->refs, 1); 2849 + } 2850 + if (PageWriteback(page)) 2851 + end_page_writeback(page); 2852 + 2853 + lock_page(page); 2854 + if (PageDirty(page)) { 2855 + clear_page_dirty_for_io(page); 2856 + spin_lock_irq(&page->mapping->tree_lock); 2857 + radix_tree_tag_clear(&page->mapping->page_tree, 2858 + page_index(page), 2859 + PAGECACHE_TAG_DIRTY); 2860 + spin_unlock_irq(&page->mapping->tree_lock); 2861 + } 2862 + 2863 + page->mapping->a_ops->invalidatepage(page, 0); 2864 + unlock_page(page); 2865 + } 2866 + } 2867 + 2868 + return ret; 2869 + } 2870 + 2871 + static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 2872 + struct extent_io_tree *pinned_extents) 2873 + { 2874 + struct extent_io_tree *unpin; 2875 + u64 start; 2876 + u64 end; 2877 + int ret; 2878 + 2879 + unpin = pinned_extents; 2880 + while (1) { 2881 + ret = find_first_extent_bit(unpin, 0, &start, &end, 2882 + EXTENT_DIRTY); 2883 + if (ret) 2884 + break; 2885 + 2886 + /* opt_discard */ 2887 + ret = btrfs_error_discard_extent(root, start, end + 1 - start); 2888 + 2889 + clear_extent_dirty(unpin, start, end, GFP_NOFS); 2890 + btrfs_error_unpin_extent_range(root, start, end); 2891 + cond_resched(); 2892 + } 2893 + 2894 + return 0; 2895 + } 2896 + 2897 + static int btrfs_cleanup_transaction(struct btrfs_root *root) 2898 + { 2899 + struct btrfs_transaction *t; 2900 + LIST_HEAD(list); 2901 + 2902 + WARN_ON(1); 2903 + 2904 + mutex_lock(&root->fs_info->trans_mutex); 2905 + mutex_lock(&root->fs_info->transaction_kthread_mutex); 2906 + 2907 + list_splice_init(&root->fs_info->trans_list, &list); 2908 + while (!list_empty(&list)) { 2909 + t = list_entry(list.next, struct btrfs_transaction, list); 2910 + if (!t) 2911 + break; 2912 + 2913 + btrfs_destroy_ordered_operations(root); 2914 + 2915 + btrfs_destroy_ordered_extents(root); 2916 + 2917 + btrfs_destroy_delayed_refs(t, root); 2918 + 2919 + btrfs_block_rsv_release(root, 2920 + &root->fs_info->trans_block_rsv, 2921 + t->dirty_pages.dirty_bytes); 2922 + 2923 + /* FIXME: cleanup wait for commit */ 2924 + t->in_commit = 1; 2925 + t->blocked = 1; 2926 + if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 2927 + wake_up(&root->fs_info->transaction_blocked_wait); 2928 + 2929 + t->blocked = 0; 2930 + if (waitqueue_active(&root->fs_info->transaction_wait)) 2931 + wake_up(&root->fs_info->transaction_wait); 2932 + mutex_unlock(&root->fs_info->trans_mutex); 2933 + 2934 + mutex_lock(&root->fs_info->trans_mutex); 2935 + t->commit_done = 1; 2936 + if (waitqueue_active(&t->commit_wait)) 2937 + wake_up(&t->commit_wait); 2938 + mutex_unlock(&root->fs_info->trans_mutex); 2939 + 2940 + mutex_lock(&root->fs_info->trans_mutex); 2941 + 2942 + btrfs_destroy_pending_snapshots(t); 2943 + 2944 + btrfs_destroy_delalloc_inodes(root); 2945 + 2946 + spin_lock(&root->fs_info->new_trans_lock); 2947 + root->fs_info->running_transaction = NULL; 2948 + spin_unlock(&root->fs_info->new_trans_lock); 2949 + 2950 + btrfs_destroy_marked_extents(root, &t->dirty_pages, 2951 + EXTENT_DIRTY); 2952 + 2953 + btrfs_destroy_pinned_extent(root, 2954 + root->fs_info->pinned_extents); 2955 + 2956 + t->use_count = 0; 2957 + list_del_init(&t->list); 2958 + memset(t, 0, sizeof(*t)); 2959 + kmem_cache_free(btrfs_transaction_cachep, t); 2960 + } 2961 + 2962 + mutex_unlock(&root->fs_info->transaction_kthread_mutex); 2963 + mutex_unlock(&root->fs_info->trans_mutex); 2964 + 2965 return 0; 2966 } 2967

+1

fs/btrfs/disk-io.h

··· 52 struct btrfs_root *root, int max_mirrors); 53 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 54 int btrfs_commit_super(struct btrfs_root *root); 55 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 56 u64 bytenr, u32 blocksize); 57 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,

··· 52 struct btrfs_root *root, int max_mirrors); 53 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 54 int btrfs_commit_super(struct btrfs_root *root); 55 + int btrfs_error_commit_super(struct btrfs_root *root); 56 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 57 u64 bytenr, u32 blocksize); 58 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,

+79 -11

fs/btrfs/extent-tree.c

··· 3089 return btrfs_reduce_alloc_profile(root, flags); 3090 } 3091 3092 - static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3093 { 3094 u64 flags; 3095 ··· 3161 bytes + 2 * 1024 * 1024, 3162 alloc_target, 0); 3163 btrfs_end_transaction(trans, root); 3164 - if (ret < 0) 3165 - return ret; 3166 3167 if (!data_sinfo) { 3168 btrfs_set_inode_space_info(root, inode); ··· 3177 spin_unlock(&data_sinfo->lock); 3178 3179 /* commit the current transaction and try again */ 3180 if (!committed && !root->fs_info->open_ioctl_trans) { 3181 committed = 1; 3182 trans = btrfs_join_transaction(root, 1); ··· 3725 ret = btrfs_commit_transaction(trans, root); 3726 return 0; 3727 } 3728 - 3729 - WARN_ON(1); 3730 - printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", 3731 - block_rsv->size, block_rsv->reserved, 3732 - block_rsv->freed[0], block_rsv->freed[1]); 3733 3734 return -ENOSPC; 3735 } ··· 7970 7971 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 7972 sinfo->bytes_may_use + sinfo->bytes_readonly + 7973 - cache->reserved_pinned + num_bytes < sinfo->total_bytes) { 7974 sinfo->bytes_readonly += num_bytes; 7975 sinfo->bytes_reserved += cache->reserved_pinned; 7976 cache->reserved_pinned = 0; 7977 cache->ro = 1; 7978 ret = 0; 7979 } 7980 spin_unlock(&cache->lock); 7981 spin_unlock(&sinfo->lock); 7982 return ret; ··· 8011 out: 8012 btrfs_end_transaction(trans, root); 8013 return ret; 8014 } 8015 8016 int btrfs_set_block_group_rw(struct btrfs_root *root, ··· 8149 mutex_lock(&root->fs_info->chunk_mutex); 8150 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 8151 u64 min_free = btrfs_block_group_used(&block_group->item); 8152 - u64 dev_offset, max_avail; 8153 8154 /* 8155 * check to make sure we can actually find a chunk with enough ··· 8157 */ 8158 if (device->total_bytes > device->bytes_used + min_free) { 8159 ret = find_free_dev_extent(NULL, device, min_free, 8160 - &dev_offset, &max_avail); 8161 if (!ret) 8162 break; 8163 ret = -1; ··· 8640 out: 8641 btrfs_free_path(path); 8642 return ret; 8643 }

··· 3089 return btrfs_reduce_alloc_profile(root, flags); 3090 } 3091 3092 + u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3093 { 3094 u64 flags; 3095 ··· 3161 bytes + 2 * 1024 * 1024, 3162 alloc_target, 0); 3163 btrfs_end_transaction(trans, root); 3164 + if (ret < 0) { 3165 + if (ret != -ENOSPC) 3166 + return ret; 3167 + else 3168 + goto commit_trans; 3169 + } 3170 3171 if (!data_sinfo) { 3172 btrfs_set_inode_space_info(root, inode); ··· 3173 spin_unlock(&data_sinfo->lock); 3174 3175 /* commit the current transaction and try again */ 3176 + commit_trans: 3177 if (!committed && !root->fs_info->open_ioctl_trans) { 3178 committed = 1; 3179 trans = btrfs_join_transaction(root, 1); ··· 3720 ret = btrfs_commit_transaction(trans, root); 3721 return 0; 3722 } 3723 3724 return -ENOSPC; 3725 } ··· 7970 7971 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 7972 sinfo->bytes_may_use + sinfo->bytes_readonly + 7973 + cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 7974 sinfo->bytes_readonly += num_bytes; 7975 sinfo->bytes_reserved += cache->reserved_pinned; 7976 cache->reserved_pinned = 0; 7977 cache->ro = 1; 7978 ret = 0; 7979 } 7980 + 7981 spin_unlock(&cache->lock); 7982 spin_unlock(&sinfo->lock); 7983 return ret; ··· 8010 out: 8011 btrfs_end_transaction(trans, root); 8012 return ret; 8013 + } 8014 + 8015 + /* 8016 + * helper to account the unused space of all the readonly block group in the 8017 + * list. takes mirrors into account. 8018 + */ 8019 + static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) 8020 + { 8021 + struct btrfs_block_group_cache *block_group; 8022 + u64 free_bytes = 0; 8023 + int factor; 8024 + 8025 + list_for_each_entry(block_group, groups_list, list) { 8026 + spin_lock(&block_group->lock); 8027 + 8028 + if (!block_group->ro) { 8029 + spin_unlock(&block_group->lock); 8030 + continue; 8031 + } 8032 + 8033 + if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 8034 + BTRFS_BLOCK_GROUP_RAID10 | 8035 + BTRFS_BLOCK_GROUP_DUP)) 8036 + factor = 2; 8037 + else 8038 + factor = 1; 8039 + 8040 + free_bytes += (block_group->key.offset - 8041 + btrfs_block_group_used(&block_group->item)) * 8042 + factor; 8043 + 8044 + spin_unlock(&block_group->lock); 8045 + } 8046 + 8047 + return free_bytes; 8048 + } 8049 + 8050 + /* 8051 + * helper to account the unused space of all the readonly block group in the 8052 + * space_info. takes mirrors into account. 8053 + */ 8054 + u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 8055 + { 8056 + int i; 8057 + u64 free_bytes = 0; 8058 + 8059 + spin_lock(&sinfo->lock); 8060 + 8061 + for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 8062 + if (!list_empty(&sinfo->block_groups[i])) 8063 + free_bytes += __btrfs_get_ro_block_group_free_space( 8064 + &sinfo->block_groups[i]); 8065 + 8066 + spin_unlock(&sinfo->lock); 8067 + 8068 + return free_bytes; 8069 } 8070 8071 int btrfs_set_block_group_rw(struct btrfs_root *root, ··· 8092 mutex_lock(&root->fs_info->chunk_mutex); 8093 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 8094 u64 min_free = btrfs_block_group_used(&block_group->item); 8095 + u64 dev_offset; 8096 8097 /* 8098 * check to make sure we can actually find a chunk with enough ··· 8100 */ 8101 if (device->total_bytes > device->bytes_used + min_free) { 8102 ret = find_free_dev_extent(NULL, device, min_free, 8103 + &dev_offset, NULL); 8104 if (!ret) 8105 break; 8106 ret = -1; ··· 8583 out: 8584 btrfs_free_path(path); 8585 return ret; 8586 + } 8587 + 8588 + int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 8589 + { 8590 + return unpin_extent_range(root, start, end); 8591 + } 8592 + 8593 + int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 8594 + u64 num_bytes) 8595 + { 8596 + return btrfs_discard_extent(root, bytenr, num_bytes); 8597 }

+6 -1

fs/btrfs/extent_io.c

··· 2028 BUG_ON(extent_map_end(em) <= cur); 2029 BUG_ON(end < cur); 2030 2031 - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2032 this_bio_flag = EXTENT_BIO_COMPRESSED; 2033 2034 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2035 cur_end = min(extent_map_end(em) - 1, end); ··· 3075 #endif 3076 3077 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 3078 eb->start = start; 3079 eb->len = len; 3080 spin_lock_init(&eb->lock);

··· 2028 BUG_ON(extent_map_end(em) <= cur); 2029 BUG_ON(end < cur); 2030 2031 + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2032 this_bio_flag = EXTENT_BIO_COMPRESSED; 2033 + extent_set_compress_type(&this_bio_flag, 2034 + em->compress_type); 2035 + } 2036 2037 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2038 cur_end = min(extent_map_end(em) - 1, end); ··· 3072 #endif 3073 3074 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 3075 + if (eb == NULL) 3076 + return NULL; 3077 eb->start = start; 3078 eb->len = len; 3079 spin_lock_init(&eb->lock);

+16 -1

fs/btrfs/extent_io.h

··· 20 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 21 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 22 23 - /* flags for bio submission */ 24 #define EXTENT_BIO_COMPRESSED 1 25 26 /* these are bit numbers for test/set bit */ 27 #define EXTENT_BUFFER_UPTODATE 0 ··· 138 */ 139 wait_queue_head_t lock_wq; 140 }; 141 142 struct extent_map_tree; 143

··· 20 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 21 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 22 23 + /* 24 + * flags for bio submission. The high bits indicate the compression 25 + * type for this bio 26 + */ 27 #define EXTENT_BIO_COMPRESSED 1 28 + #define EXTENT_BIO_FLAG_SHIFT 16 29 30 /* these are bit numbers for test/set bit */ 31 #define EXTENT_BUFFER_UPTODATE 0 ··· 134 */ 135 wait_queue_head_t lock_wq; 136 }; 137 + 138 + static inline void extent_set_compress_type(unsigned long *bio_flags, 139 + int compress_type) 140 + { 141 + *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; 142 + } 143 + 144 + static inline int extent_compress_type(unsigned long bio_flags) 145 + { 146 + return bio_flags >> EXTENT_BIO_FLAG_SHIFT; 147 + } 148 149 struct extent_map_tree; 150

+2

fs/btrfs/extent_map.c

··· 3 #include <linux/module.h> 4 #include <linux/spinlock.h> 5 #include <linux/hardirq.h> 6 #include "extent_map.h" 7 8 ··· 55 return em; 56 em->in_tree = 0; 57 em->flags = 0; 58 atomic_set(&em->refs, 1); 59 return em; 60 }

··· 3 #include <linux/module.h> 4 #include <linux/spinlock.h> 5 #include <linux/hardirq.h> 6 + #include "ctree.h" 7 #include "extent_map.h" 8 9 ··· 54 return em; 55 em->in_tree = 0; 56 em->flags = 0; 57 + em->compress_type = BTRFS_COMPRESS_NONE; 58 atomic_set(&em->refs, 1); 59 return em; 60 }

+2 -1

fs/btrfs/extent_map.h

··· 26 unsigned long flags; 27 struct block_device *bdev; 28 atomic_t refs; 29 - int in_tree; 30 }; 31 32 struct extent_map_tree {

··· 26 unsigned long flags; 27 struct block_device *bdev; 28 atomic_t refs; 29 + unsigned int in_tree:1; 30 + unsigned int compress_type:4; 31 }; 32 33 struct extent_map_tree {

+13

fs/btrfs/file.c

··· 225 226 split->bdev = em->bdev; 227 split->flags = flags; 228 ret = add_extent_mapping(em_tree, split); 229 BUG_ON(ret); 230 free_extent_map(split); ··· 240 split->len = em->start + em->len - (start + len); 241 split->bdev = em->bdev; 242 split->flags = flags; 243 244 if (compressed) { 245 split->block_len = em->block_len; ··· 892 err = file_remove_suid(file); 893 if (err) 894 goto out; 895 896 file_update_time(file); 897 BTRFS_I(inode)->sequence++;

··· 225 226 split->bdev = em->bdev; 227 split->flags = flags; 228 + split->compress_type = em->compress_type; 229 ret = add_extent_mapping(em_tree, split); 230 BUG_ON(ret); 231 free_extent_map(split); ··· 239 split->len = em->start + em->len - (start + len); 240 split->bdev = em->bdev; 241 split->flags = flags; 242 + split->compress_type = em->compress_type; 243 244 if (compressed) { 245 split->block_len = em->block_len; ··· 890 err = file_remove_suid(file); 891 if (err) 892 goto out; 893 + 894 + /* 895 + * If BTRFS flips readonly due to some impossible error 896 + * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), 897 + * although we have opened a file as writable, we have 898 + * to stop this write operation to ensure FS consistency. 899 + */ 900 + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 901 + err = -EROFS; 902 + goto out; 903 + } 904 905 file_update_time(file); 906 BTRFS_I(inode)->sequence++;

+59 -31

fs/btrfs/inode.c

··· 122 size_t cur_size = size; 123 size_t datasize; 124 unsigned long offset; 125 - int use_compress = 0; 126 127 if (compressed_size && compressed_pages) { 128 - use_compress = 1; 129 cur_size = compressed_size; 130 } 131 ··· 159 btrfs_set_file_extent_ram_bytes(leaf, ei, size); 160 ptr = btrfs_file_extent_inline_start(ei); 161 162 - if (use_compress) { 163 struct page *cpage; 164 int i = 0; 165 while (compressed_size > 0) { ··· 176 compressed_size -= cur_size; 177 } 178 btrfs_set_file_extent_compression(leaf, ei, 179 - BTRFS_COMPRESS_ZLIB); 180 } else { 181 page = find_get_page(inode->i_mapping, 182 start >> PAGE_CACHE_SHIFT); ··· 263 u64 compressed_size; 264 struct page **pages; 265 unsigned long nr_pages; 266 struct list_head list; 267 }; 268 ··· 281 u64 start, u64 ram_size, 282 u64 compressed_size, 283 struct page **pages, 284 - unsigned long nr_pages) 285 { 286 struct async_extent *async_extent; 287 ··· 292 async_extent->compressed_size = compressed_size; 293 async_extent->pages = pages; 294 async_extent->nr_pages = nr_pages; 295 list_add_tail(&async_extent->list, &cow->extents); 296 return 0; 297 } ··· 335 unsigned long max_uncompressed = 128 * 1024; 336 int i; 337 int will_compress; 338 339 actual_end = min_t(u64, isize, end + 1); 340 again: ··· 385 WARN_ON(pages); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 387 388 - ret = btrfs_zlib_compress_pages(inode->i_mapping, start, 389 - total_compressed, pages, 390 - nr_pages, &nr_pages_ret, 391 - &total_in, 392 - &total_compressed, 393 - max_compressed); 394 395 if (!ret) { 396 unsigned long offset = total_compressed & ··· 501 * and will submit them to the elevator. 502 */ 503 add_async_extent(async_cow, start, num_bytes, 504 - total_compressed, pages, nr_pages_ret); 505 506 if (start + num_bytes < end) { 507 start += num_bytes; ··· 524 __set_page_dirty_nobuffers(locked_page); 525 /* unlocked later on in the async handlers */ 526 } 527 - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); 528 *num_added += 1; 529 } 530 ··· 650 em->block_start = ins.objectid; 651 em->block_len = ins.offset; 652 em->bdev = root->fs_info->fs_devices->latest_bdev; 653 set_bit(EXTENT_FLAG_PINNED, &em->flags); 654 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 655 ··· 667 async_extent->ram_size - 1, 0); 668 } 669 670 - ret = btrfs_add_ordered_extent(inode, async_extent->start, 671 - ins.objectid, 672 - async_extent->ram_size, 673 - ins.offset, 674 - BTRFS_ORDERED_COMPRESSED); 675 BUG_ON(ret); 676 677 /* ··· 1683 struct btrfs_ordered_extent *ordered_extent = NULL; 1684 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1685 struct extent_state *cached_state = NULL; 1686 - int compressed = 0; 1687 int ret; 1688 bool nolock = false; 1689 ··· 1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1725 1726 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1727 - compressed = 1; 1728 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1729 - BUG_ON(compressed); 1730 ret = btrfs_mark_extent_written(trans, inode, 1731 ordered_extent->file_offset, 1732 ordered_extent->file_offset + ··· 1740 ordered_extent->disk_len, 1741 ordered_extent->len, 1742 ordered_extent->len, 1743 - compressed, 0, 0, 1744 BTRFS_FILE_EXTENT_REG); 1745 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1746 ordered_extent->file_offset, ··· 1842 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 1843 logical = em->block_start; 1844 failrec->bio_flags = EXTENT_BIO_COMPRESSED; 1845 } 1846 failrec->logical = logical; 1847 free_extent_map(em); ··· 3686 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3687 { 3688 struct inode *inode = dentry->d_inode; 3689 int err; 3690 3691 err = inode_change_ok(inode, attr); 3692 if (err) ··· 4947 size_t max_size; 4948 unsigned long inline_size; 4949 unsigned long ptr; 4950 4951 WARN_ON(pg_offset != 0); 4952 max_size = btrfs_file_extent_ram_bytes(leaf, item); 4953 inline_size = btrfs_file_extent_inline_item_len(leaf, 4954 btrfs_item_nr(leaf, path->slots[0])); ··· 4960 read_extent_buffer(leaf, tmp, ptr, inline_size); 4961 4962 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); 4963 - ret = btrfs_zlib_decompress(tmp, page, extent_offset, 4964 - inline_size, max_size); 4965 if (ret) { 4966 char *kaddr = kmap_atomic(page, KM_USER0); 4967 unsigned long copy_size = min_t(u64, ··· 5003 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5004 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5005 struct btrfs_trans_handle *trans = NULL; 5006 - int compressed; 5007 5008 again: 5009 read_lock(&em_tree->lock); ··· 5062 5063 found_type = btrfs_file_extent_type(leaf, item); 5064 extent_start = found_key.offset; 5065 - compressed = btrfs_file_extent_compression(leaf, item); 5066 if (found_type == BTRFS_FILE_EXTENT_REG || 5067 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5068 extent_end = extent_start + ··· 5108 em->block_start = EXTENT_MAP_HOLE; 5109 goto insert; 5110 } 5111 - if (compressed) { 5112 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5113 em->block_start = bytenr; 5114 em->block_len = btrfs_file_extent_disk_num_bytes(leaf, 5115 item); ··· 5144 em->len = (copy_size + root->sectorsize - 1) & 5145 ~((u64)root->sectorsize - 1); 5146 em->orig_start = EXTENT_MAP_INLINE; 5147 - if (compressed) 5148 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5149 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 5150 if (create == 0 && !PageUptodate(page)) { 5151 - if (btrfs_file_extent_compression(leaf, item) == 5152 - BTRFS_COMPRESS_ZLIB) { 5153 ret = uncompress_inline(path, inode, page, 5154 pg_offset, 5155 extent_offset, item); ··· 6501 ei->ordered_data_close = 0; 6502 ei->orphan_meta_reserved = 0; 6503 ei->dummy_inode = 0; 6504 - ei->force_compress = 0; 6505 6506 inode = &ei->vfs_inode; 6507 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); ··· 7129 7130 static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) 7131 { 7132 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7133 return -EACCES; 7134 return generic_permission(inode, mask, flags, btrfs_check_acl);

··· 122 size_t cur_size = size; 123 size_t datasize; 124 unsigned long offset; 125 + int compress_type = BTRFS_COMPRESS_NONE; 126 127 if (compressed_size && compressed_pages) { 128 + compress_type = root->fs_info->compress_type; 129 cur_size = compressed_size; 130 } 131 ··· 159 btrfs_set_file_extent_ram_bytes(leaf, ei, size); 160 ptr = btrfs_file_extent_inline_start(ei); 161 162 + if (compress_type != BTRFS_COMPRESS_NONE) { 163 struct page *cpage; 164 int i = 0; 165 while (compressed_size > 0) { ··· 176 compressed_size -= cur_size; 177 } 178 btrfs_set_file_extent_compression(leaf, ei, 179 + compress_type); 180 } else { 181 page = find_get_page(inode->i_mapping, 182 start >> PAGE_CACHE_SHIFT); ··· 263 u64 compressed_size; 264 struct page **pages; 265 unsigned long nr_pages; 266 + int compress_type; 267 struct list_head list; 268 }; 269 ··· 280 u64 start, u64 ram_size, 281 u64 compressed_size, 282 struct page **pages, 283 + unsigned long nr_pages, 284 + int compress_type) 285 { 286 struct async_extent *async_extent; 287 ··· 290 async_extent->compressed_size = compressed_size; 291 async_extent->pages = pages; 292 async_extent->nr_pages = nr_pages; 293 + async_extent->compress_type = compress_type; 294 list_add_tail(&async_extent->list, &cow->extents); 295 return 0; 296 } ··· 332 unsigned long max_uncompressed = 128 * 1024; 333 int i; 334 int will_compress; 335 + int compress_type = root->fs_info->compress_type; 336 337 actual_end = min_t(u64, isize, end + 1); 338 again: ··· 381 WARN_ON(pages); 382 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 383 384 + if (BTRFS_I(inode)->force_compress) 385 + compress_type = BTRFS_I(inode)->force_compress; 386 + 387 + ret = btrfs_compress_pages(compress_type, 388 + inode->i_mapping, start, 389 + total_compressed, pages, 390 + nr_pages, &nr_pages_ret, 391 + &total_in, 392 + &total_compressed, 393 + max_compressed); 394 395 if (!ret) { 396 unsigned long offset = total_compressed & ··· 493 * and will submit them to the elevator. 494 */ 495 add_async_extent(async_cow, start, num_bytes, 496 + total_compressed, pages, nr_pages_ret, 497 + compress_type); 498 499 if (start + num_bytes < end) { 500 start += num_bytes; ··· 515 __set_page_dirty_nobuffers(locked_page); 516 /* unlocked later on in the async handlers */ 517 } 518 + add_async_extent(async_cow, start, end - start + 1, 519 + 0, NULL, 0, BTRFS_COMPRESS_NONE); 520 *num_added += 1; 521 } 522 ··· 640 em->block_start = ins.objectid; 641 em->block_len = ins.offset; 642 em->bdev = root->fs_info->fs_devices->latest_bdev; 643 + em->compress_type = async_extent->compress_type; 644 set_bit(EXTENT_FLAG_PINNED, &em->flags); 645 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 646 ··· 656 async_extent->ram_size - 1, 0); 657 } 658 659 + ret = btrfs_add_ordered_extent_compress(inode, 660 + async_extent->start, 661 + ins.objectid, 662 + async_extent->ram_size, 663 + ins.offset, 664 + BTRFS_ORDERED_COMPRESSED, 665 + async_extent->compress_type); 666 BUG_ON(ret); 667 668 /* ··· 1670 struct btrfs_ordered_extent *ordered_extent = NULL; 1671 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1672 struct extent_state *cached_state = NULL; 1673 + int compress_type = 0; 1674 int ret; 1675 bool nolock = false; 1676 ··· 1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1712 1713 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1714 + compress_type = ordered_extent->compress_type; 1715 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1716 + BUG_ON(compress_type); 1717 ret = btrfs_mark_extent_written(trans, inode, 1718 ordered_extent->file_offset, 1719 ordered_extent->file_offset + ··· 1727 ordered_extent->disk_len, 1728 ordered_extent->len, 1729 ordered_extent->len, 1730 + compress_type, 0, 0, 1731 BTRFS_FILE_EXTENT_REG); 1732 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1733 ordered_extent->file_offset, ··· 1829 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 1830 logical = em->block_start; 1831 failrec->bio_flags = EXTENT_BIO_COMPRESSED; 1832 + extent_set_compress_type(&failrec->bio_flags, 1833 + em->compress_type); 1834 } 1835 failrec->logical = logical; 1836 free_extent_map(em); ··· 3671 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3672 { 3673 struct inode *inode = dentry->d_inode; 3674 + struct btrfs_root *root = BTRFS_I(inode)->root; 3675 int err; 3676 + 3677 + if (btrfs_root_readonly(root)) 3678 + return -EROFS; 3679 3680 err = inode_change_ok(inode, attr); 3681 if (err) ··· 4928 size_t max_size; 4929 unsigned long inline_size; 4930 unsigned long ptr; 4931 + int compress_type; 4932 4933 WARN_ON(pg_offset != 0); 4934 + compress_type = btrfs_file_extent_compression(leaf, item); 4935 max_size = btrfs_file_extent_ram_bytes(leaf, item); 4936 inline_size = btrfs_file_extent_inline_item_len(leaf, 4937 btrfs_item_nr(leaf, path->slots[0])); ··· 4939 read_extent_buffer(leaf, tmp, ptr, inline_size); 4940 4941 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); 4942 + ret = btrfs_decompress(compress_type, tmp, page, 4943 + extent_offset, inline_size, max_size); 4944 if (ret) { 4945 char *kaddr = kmap_atomic(page, KM_USER0); 4946 unsigned long copy_size = min_t(u64, ··· 4982 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4983 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4984 struct btrfs_trans_handle *trans = NULL; 4985 + int compress_type; 4986 4987 again: 4988 read_lock(&em_tree->lock); ··· 5041 5042 found_type = btrfs_file_extent_type(leaf, item); 5043 extent_start = found_key.offset; 5044 + compress_type = btrfs_file_extent_compression(leaf, item); 5045 if (found_type == BTRFS_FILE_EXTENT_REG || 5046 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5047 extent_end = extent_start + ··· 5087 em->block_start = EXTENT_MAP_HOLE; 5088 goto insert; 5089 } 5090 + if (compress_type != BTRFS_COMPRESS_NONE) { 5091 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5092 + em->compress_type = compress_type; 5093 em->block_start = bytenr; 5094 em->block_len = btrfs_file_extent_disk_num_bytes(leaf, 5095 item); ··· 5122 em->len = (copy_size + root->sectorsize - 1) & 5123 ~((u64)root->sectorsize - 1); 5124 em->orig_start = EXTENT_MAP_INLINE; 5125 + if (compress_type) { 5126 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5127 + em->compress_type = compress_type; 5128 + } 5129 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 5130 if (create == 0 && !PageUptodate(page)) { 5131 + if (btrfs_file_extent_compression(leaf, item) != 5132 + BTRFS_COMPRESS_NONE) { 5133 ret = uncompress_inline(path, inode, page, 5134 pg_offset, 5135 extent_offset, item); ··· 6477 ei->ordered_data_close = 0; 6478 ei->orphan_meta_reserved = 0; 6479 ei->dummy_inode = 0; 6480 + ei->force_compress = BTRFS_COMPRESS_NONE; 6481 6482 inode = &ei->vfs_inode; 6483 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); ··· 7105 7106 static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) 7107 { 7108 + struct btrfs_root *root = BTRFS_I(inode)->root; 7109 + 7110 + if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) 7111 + return -EROFS; 7112 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7113 return -EACCES; 7114 return generic_permission(inode, mask, flags, btrfs_check_acl);

+169 -51

fs/btrfs/ioctl.c

··· 147 unsigned int flags, oldflags; 148 int ret; 149 150 if (copy_from_user(&flags, arg, sizeof(flags))) 151 return -EFAULT; 152 ··· 363 } 364 365 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 366 - char *name, int namelen, u64 *async_transid) 367 { 368 struct inode *inode; 369 struct dentry *parent; ··· 382 btrfs_init_block_rsv(&pending_snapshot->block_rsv); 383 pending_snapshot->dentry = dentry; 384 pending_snapshot->root = root; 385 386 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 387 if (IS_ERR(trans)) { ··· 514 static noinline int btrfs_mksubvol(struct path *parent, 515 char *name, int namelen, 516 struct btrfs_root *snap_src, 517 - u64 *async_transid) 518 { 519 struct inode *dir = parent->dentry->d_inode; 520 struct dentry *dentry; ··· 546 547 if (snap_src) { 548 error = create_snapshot(snap_src, dentry, 549 - name, namelen, async_transid); 550 } else { 551 error = create_subvol(BTRFS_I(dir)->root, dentry, 552 name, namelen, async_transid); ··· 643 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 644 struct btrfs_ordered_extent *ordered; 645 struct page *page; 646 unsigned long last_index; 647 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 648 unsigned long total_read = 0; 649 u64 page_start; 650 u64 page_end; 651 u64 last_len = 0; ··· 655 u64 defrag_end = 0; 656 unsigned long i; 657 int ret; 658 659 if (inode->i_size == 0) 660 return 0; ··· 698 total_read++; 699 mutex_lock(&inode->i_mutex); 700 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 701 - BTRFS_I(inode)->force_compress = 1; 702 703 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 704 if (ret) ··· 796 atomic_dec(&root->fs_info->async_submit_draining); 797 798 mutex_lock(&inode->i_mutex); 799 - BTRFS_I(inode)->force_compress = 0; 800 mutex_unlock(&inode->i_mutex); 801 } 802 803 return 0; ··· 923 char *name, 924 unsigned long fd, 925 int subvol, 926 - u64 *transid) 927 { 928 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 929 struct file *src_file; ··· 942 943 if (subvol) { 944 ret = btrfs_mksubvol(&file->f_path, name, namelen, 945 - NULL, transid); 946 } else { 947 struct inode *src_inode; 948 src_file = fget(fd); ··· 961 } 962 ret = btrfs_mksubvol(&file->f_path, name, namelen, 963 BTRFS_I(src_inode)->root, 964 - transid); 965 fput(src_file); 966 } 967 out: ··· 969 } 970 971 static noinline int btrfs_ioctl_snap_create(struct file *file, 972 - void __user *arg, int subvol, 973 - int v2) 974 { 975 - struct btrfs_ioctl_vol_args *vol_args = NULL; 976 - struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; 977 - char *name; 978 - u64 fd; 979 int ret; 980 981 - if (v2) { 982 - u64 transid = 0; 983 - u64 *ptr = NULL; 984 985 - vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); 986 - if (IS_ERR(vol_args_v2)) 987 - return PTR_ERR(vol_args_v2); 988 989 - if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { 990 - ret = -EINVAL; 991 - goto out; 992 - } 993 994 - name = vol_args_v2->name; 995 - fd = vol_args_v2->fd; 996 - vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 997 998 - if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) 999 - ptr = &transid; 1000 1001 - ret = btrfs_ioctl_snap_create_transid(file, name, fd, 1002 - subvol, ptr); 1003 - 1004 - if (ret == 0 && ptr && 1005 - copy_to_user(arg + 1006 - offsetof(struct btrfs_ioctl_vol_args_v2, 1007 - transid), ptr, sizeof(*ptr))) 1008 - ret = -EFAULT; 1009 - } else { 1010 - vol_args = memdup_user(arg, sizeof(*vol_args)); 1011 - if (IS_ERR(vol_args)) 1012 - return PTR_ERR(vol_args); 1013 - name = vol_args->name; 1014 - fd = vol_args->fd; 1015 - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1016 - 1017 - ret = btrfs_ioctl_snap_create_transid(file, name, fd, 1018 - subvol, NULL); 1019 } 1020 out: 1021 kfree(vol_args); 1022 - kfree(vol_args_v2); 1023 1024 return ret; 1025 } 1026 ··· 1613 struct btrfs_ioctl_defrag_range_args *range; 1614 int ret; 1615 1616 ret = mnt_want_write(file->f_path.mnt); 1617 if (ret) 1618 return ret; ··· 1743 /* the destination must be opened for writing */ 1744 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 1745 return -EINVAL; 1746 1747 ret = mnt_want_write(file->f_path.mnt); 1748 if (ret) ··· 2068 if (file->private_data) 2069 goto out; 2070 2071 ret = mnt_want_write(file->f_path.mnt); 2072 if (ret) 2073 goto out; ··· 2371 case FS_IOC_GETVERSION: 2372 return btrfs_ioctl_getversion(file, argp); 2373 case BTRFS_IOC_SNAP_CREATE: 2374 - return btrfs_ioctl_snap_create(file, argp, 0, 0); 2375 case BTRFS_IOC_SNAP_CREATE_V2: 2376 - return btrfs_ioctl_snap_create(file, argp, 0, 1); 2377 case BTRFS_IOC_SUBVOL_CREATE: 2378 - return btrfs_ioctl_snap_create(file, argp, 1, 0); 2379 case BTRFS_IOC_SNAP_DESTROY: 2380 return btrfs_ioctl_snap_destroy(file, argp); 2381 case BTRFS_IOC_DEFAULT_SUBVOL: 2382 return btrfs_ioctl_default_subvol(file, argp); 2383 case BTRFS_IOC_DEFRAG:

··· 147 unsigned int flags, oldflags; 148 int ret; 149 150 + if (btrfs_root_readonly(root)) 151 + return -EROFS; 152 + 153 if (copy_from_user(&flags, arg, sizeof(flags))) 154 return -EFAULT; 155 ··· 360 } 361 362 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 363 + char *name, int namelen, u64 *async_transid, 364 + bool readonly) 365 { 366 struct inode *inode; 367 struct dentry *parent; ··· 378 btrfs_init_block_rsv(&pending_snapshot->block_rsv); 379 pending_snapshot->dentry = dentry; 380 pending_snapshot->root = root; 381 + pending_snapshot->readonly = readonly; 382 383 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 384 if (IS_ERR(trans)) { ··· 509 static noinline int btrfs_mksubvol(struct path *parent, 510 char *name, int namelen, 511 struct btrfs_root *snap_src, 512 + u64 *async_transid, bool readonly) 513 { 514 struct inode *dir = parent->dentry->d_inode; 515 struct dentry *dentry; ··· 541 542 if (snap_src) { 543 error = create_snapshot(snap_src, dentry, 544 + name, namelen, async_transid, readonly); 545 } else { 546 error = create_subvol(BTRFS_I(dir)->root, dentry, 547 name, namelen, async_transid); ··· 638 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 639 struct btrfs_ordered_extent *ordered; 640 struct page *page; 641 + struct btrfs_super_block *disk_super; 642 unsigned long last_index; 643 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 644 unsigned long total_read = 0; 645 + u64 features; 646 u64 page_start; 647 u64 page_end; 648 u64 last_len = 0; ··· 648 u64 defrag_end = 0; 649 unsigned long i; 650 int ret; 651 + int compress_type = BTRFS_COMPRESS_ZLIB; 652 + 653 + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 654 + if (range->compress_type > BTRFS_COMPRESS_TYPES) 655 + return -EINVAL; 656 + if (range->compress_type) 657 + compress_type = range->compress_type; 658 + } 659 660 if (inode->i_size == 0) 661 return 0; ··· 683 total_read++; 684 mutex_lock(&inode->i_mutex); 685 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 686 + BTRFS_I(inode)->force_compress = compress_type; 687 688 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 689 if (ret) ··· 781 atomic_dec(&root->fs_info->async_submit_draining); 782 783 mutex_lock(&inode->i_mutex); 784 + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 785 mutex_unlock(&inode->i_mutex); 786 + } 787 + 788 + disk_super = &root->fs_info->super_copy; 789 + features = btrfs_super_incompat_flags(disk_super); 790 + if (range->compress_type == BTRFS_COMPRESS_LZO) { 791 + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 792 + btrfs_set_super_incompat_flags(disk_super, features); 793 } 794 795 return 0; ··· 901 char *name, 902 unsigned long fd, 903 int subvol, 904 + u64 *transid, 905 + bool readonly) 906 { 907 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 908 struct file *src_file; ··· 919 920 if (subvol) { 921 ret = btrfs_mksubvol(&file->f_path, name, namelen, 922 + NULL, transid, readonly); 923 } else { 924 struct inode *src_inode; 925 src_file = fget(fd); ··· 938 } 939 ret = btrfs_mksubvol(&file->f_path, name, namelen, 940 BTRFS_I(src_inode)->root, 941 + transid, readonly); 942 fput(src_file); 943 } 944 out: ··· 946 } 947 948 static noinline int btrfs_ioctl_snap_create(struct file *file, 949 + void __user *arg, int subvol) 950 { 951 + struct btrfs_ioctl_vol_args *vol_args; 952 int ret; 953 954 + vol_args = memdup_user(arg, sizeof(*vol_args)); 955 + if (IS_ERR(vol_args)) 956 + return PTR_ERR(vol_args); 957 + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 958 959 + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 960 + vol_args->fd, subvol, 961 + NULL, false); 962 963 + kfree(vol_args); 964 + return ret; 965 + } 966 967 + static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 968 + void __user *arg, int subvol) 969 + { 970 + struct btrfs_ioctl_vol_args_v2 *vol_args; 971 + int ret; 972 + u64 transid = 0; 973 + u64 *ptr = NULL; 974 + bool readonly = false; 975 976 + vol_args = memdup_user(arg, sizeof(*vol_args)); 977 + if (IS_ERR(vol_args)) 978 + return PTR_ERR(vol_args); 979 + vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 980 981 + if (vol_args->flags & 982 + ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { 983 + ret = -EOPNOTSUPP; 984 + goto out; 985 } 986 + 987 + if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 988 + ptr = &transid; 989 + if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 990 + readonly = true; 991 + 992 + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 993 + vol_args->fd, subvol, 994 + ptr, readonly); 995 + 996 + if (ret == 0 && ptr && 997 + copy_to_user(arg + 998 + offsetof(struct btrfs_ioctl_vol_args_v2, 999 + transid), ptr, sizeof(*ptr))) 1000 + ret = -EFAULT; 1001 out: 1002 kfree(vol_args); 1003 + return ret; 1004 + } 1005 1006 + static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1007 + void __user *arg) 1008 + { 1009 + struct inode *inode = fdentry(file)->d_inode; 1010 + struct btrfs_root *root = BTRFS_I(inode)->root; 1011 + int ret = 0; 1012 + u64 flags = 0; 1013 + 1014 + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1015 + return -EINVAL; 1016 + 1017 + down_read(&root->fs_info->subvol_sem); 1018 + if (btrfs_root_readonly(root)) 1019 + flags |= BTRFS_SUBVOL_RDONLY; 1020 + up_read(&root->fs_info->subvol_sem); 1021 + 1022 + if (copy_to_user(arg, &flags, sizeof(flags))) 1023 + ret = -EFAULT; 1024 + 1025 + return ret; 1026 + } 1027 + 1028 + static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1029 + void __user *arg) 1030 + { 1031 + struct inode *inode = fdentry(file)->d_inode; 1032 + struct btrfs_root *root = BTRFS_I(inode)->root; 1033 + struct btrfs_trans_handle *trans; 1034 + u64 root_flags; 1035 + u64 flags; 1036 + int ret = 0; 1037 + 1038 + if (root->fs_info->sb->s_flags & MS_RDONLY) 1039 + return -EROFS; 1040 + 1041 + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1042 + return -EINVAL; 1043 + 1044 + if (copy_from_user(&flags, arg, sizeof(flags))) 1045 + return -EFAULT; 1046 + 1047 + if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1048 + return -EINVAL; 1049 + 1050 + if (flags & ~BTRFS_SUBVOL_RDONLY) 1051 + return -EOPNOTSUPP; 1052 + 1053 + down_write(&root->fs_info->subvol_sem); 1054 + 1055 + /* nothing to do */ 1056 + if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1057 + goto out; 1058 + 1059 + root_flags = btrfs_root_flags(&root->root_item); 1060 + if (flags & BTRFS_SUBVOL_RDONLY) 1061 + btrfs_set_root_flags(&root->root_item, 1062 + root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1063 + else 1064 + btrfs_set_root_flags(&root->root_item, 1065 + root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1066 + 1067 + trans = btrfs_start_transaction(root, 1); 1068 + if (IS_ERR(trans)) { 1069 + ret = PTR_ERR(trans); 1070 + goto out_reset; 1071 + } 1072 + 1073 + ret = btrfs_update_root(trans, root, 1074 + &root->root_key, &root->root_item); 1075 + 1076 + btrfs_commit_transaction(trans, root); 1077 + out_reset: 1078 + if (ret) 1079 + btrfs_set_root_flags(&root->root_item, root_flags); 1080 + out: 1081 + up_write(&root->fs_info->subvol_sem); 1082 return ret; 1083 } 1084 ··· 1509 struct btrfs_ioctl_defrag_range_args *range; 1510 int ret; 1511 1512 + if (btrfs_root_readonly(root)) 1513 + return -EROFS; 1514 + 1515 ret = mnt_want_write(file->f_path.mnt); 1516 if (ret) 1517 return ret; ··· 1636 /* the destination must be opened for writing */ 1637 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 1638 return -EINVAL; 1639 + 1640 + if (btrfs_root_readonly(root)) 1641 + return -EROFS; 1642 1643 ret = mnt_want_write(file->f_path.mnt); 1644 if (ret) ··· 1958 if (file->private_data) 1959 goto out; 1960 1961 + ret = -EROFS; 1962 + if (btrfs_root_readonly(root)) 1963 + goto out; 1964 + 1965 ret = mnt_want_write(file->f_path.mnt); 1966 if (ret) 1967 goto out; ··· 2257 case FS_IOC_GETVERSION: 2258 return btrfs_ioctl_getversion(file, argp); 2259 case BTRFS_IOC_SNAP_CREATE: 2260 + return btrfs_ioctl_snap_create(file, argp, 0); 2261 case BTRFS_IOC_SNAP_CREATE_V2: 2262 + return btrfs_ioctl_snap_create_v2(file, argp, 0); 2263 case BTRFS_IOC_SUBVOL_CREATE: 2264 + return btrfs_ioctl_snap_create(file, argp, 1); 2265 case BTRFS_IOC_SNAP_DESTROY: 2266 return btrfs_ioctl_snap_destroy(file, argp); 2267 + case BTRFS_IOC_SUBVOL_GETFLAGS: 2268 + return btrfs_ioctl_subvol_getflags(file, argp); 2269 + case BTRFS_IOC_SUBVOL_SETFLAGS: 2270 + return btrfs_ioctl_subvol_setflags(file, argp); 2271 case BTRFS_IOC_DEFAULT_SUBVOL: 2272 return btrfs_ioctl_default_subvol(file, argp); 2273 case BTRFS_IOC_DEFRAG:

+11 -1

fs/btrfs/ioctl.h

··· 31 }; 32 33 #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 34 35 #define BTRFS_SUBVOL_NAME_MAX 4039 36 struct btrfs_ioctl_vol_args_v2 { ··· 134 */ 135 __u32 extent_thresh; 136 137 /* spare for later */ 138 - __u32 unused[5]; 139 }; 140 141 struct btrfs_ioctl_space_info { ··· 201 #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 202 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 203 struct btrfs_ioctl_vol_args_v2) 204 #endif

··· 31 }; 32 33 #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 34 + #define BTRFS_SUBVOL_RDONLY (1ULL << 1) 35 36 #define BTRFS_SUBVOL_NAME_MAX 4039 37 struct btrfs_ioctl_vol_args_v2 { ··· 133 */ 134 __u32 extent_thresh; 135 136 + /* 137 + * which compression method to use if turning on compression 138 + * for this defrag operation. If unspecified, zlib will 139 + * be used 140 + */ 141 + __u32 compress_type; 142 + 143 /* spare for later */ 144 + __u32 unused[4]; 145 }; 146 147 struct btrfs_ioctl_space_info { ··· 193 #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 194 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 195 struct btrfs_ioctl_vol_args_v2) 196 + #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 197 + #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 198 #endif

+420

fs/btrfs/lzo.c

···

··· 1 + /* 2 + * Copyright (C) 2008 Oracle. All rights reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public 6 + * License v2 as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public 14 + * License along with this program; if not, write to the 15 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 + * Boston, MA 021110-1307, USA. 17 + */ 18 + 19 + #include <linux/kernel.h> 20 + #include <linux/slab.h> 21 + #include <linux/vmalloc.h> 22 + #include <linux/init.h> 23 + #include <linux/err.h> 24 + #include <linux/sched.h> 25 + #include <linux/pagemap.h> 26 + #include <linux/bio.h> 27 + #include <linux/lzo.h> 28 + #include "compression.h" 29 + 30 + #define LZO_LEN 4 31 + 32 + struct workspace { 33 + void *mem; 34 + void *buf; /* where compressed data goes */ 35 + void *cbuf; /* where decompressed data goes */ 36 + struct list_head list; 37 + }; 38 + 39 + static void lzo_free_workspace(struct list_head *ws) 40 + { 41 + struct workspace *workspace = list_entry(ws, struct workspace, list); 42 + 43 + vfree(workspace->buf); 44 + vfree(workspace->cbuf); 45 + vfree(workspace->mem); 46 + kfree(workspace); 47 + } 48 + 49 + static struct list_head *lzo_alloc_workspace(void) 50 + { 51 + struct workspace *workspace; 52 + 53 + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 54 + if (!workspace) 55 + return ERR_PTR(-ENOMEM); 56 + 57 + workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); 58 + workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); 59 + workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); 60 + if (!workspace->mem || !workspace->buf || !workspace->cbuf) 61 + goto fail; 62 + 63 + INIT_LIST_HEAD(&workspace->list); 64 + 65 + return &workspace->list; 66 + fail: 67 + lzo_free_workspace(&workspace->list); 68 + return ERR_PTR(-ENOMEM); 69 + } 70 + 71 + static inline void write_compress_length(char *buf, size_t len) 72 + { 73 + __le32 dlen; 74 + 75 + dlen = cpu_to_le32(len); 76 + memcpy(buf, &dlen, LZO_LEN); 77 + } 78 + 79 + static inline size_t read_compress_length(char *buf) 80 + { 81 + __le32 dlen; 82 + 83 + memcpy(&dlen, buf, LZO_LEN); 84 + return le32_to_cpu(dlen); 85 + } 86 + 87 + static int lzo_compress_pages(struct list_head *ws, 88 + struct address_space *mapping, 89 + u64 start, unsigned long len, 90 + struct page **pages, 91 + unsigned long nr_dest_pages, 92 + unsigned long *out_pages, 93 + unsigned long *total_in, 94 + unsigned long *total_out, 95 + unsigned long max_out) 96 + { 97 + struct workspace *workspace = list_entry(ws, struct workspace, list); 98 + int ret = 0; 99 + char *data_in; 100 + char *cpage_out; 101 + int nr_pages = 0; 102 + struct page *in_page = NULL; 103 + struct page *out_page = NULL; 104 + unsigned long bytes_left; 105 + 106 + size_t in_len; 107 + size_t out_len; 108 + char *buf; 109 + unsigned long tot_in = 0; 110 + unsigned long tot_out = 0; 111 + unsigned long pg_bytes_left; 112 + unsigned long out_offset; 113 + unsigned long bytes; 114 + 115 + *out_pages = 0; 116 + *total_out = 0; 117 + *total_in = 0; 118 + 119 + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 120 + data_in = kmap(in_page); 121 + 122 + /* 123 + * store the size of all chunks of compressed data in 124 + * the first 4 bytes 125 + */ 126 + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 127 + if (out_page == NULL) { 128 + ret = -ENOMEM; 129 + goto out; 130 + } 131 + cpage_out = kmap(out_page); 132 + out_offset = LZO_LEN; 133 + tot_out = LZO_LEN; 134 + pages[0] = out_page; 135 + nr_pages = 1; 136 + pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; 137 + 138 + /* compress at most one page of data each time */ 139 + in_len = min(len, PAGE_CACHE_SIZE); 140 + while (tot_in < len) { 141 + ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, 142 + &out_len, workspace->mem); 143 + if (ret != LZO_E_OK) { 144 + printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", 145 + ret); 146 + ret = -1; 147 + goto out; 148 + } 149 + 150 + /* store the size of this chunk of compressed data */ 151 + write_compress_length(cpage_out + out_offset, out_len); 152 + tot_out += LZO_LEN; 153 + out_offset += LZO_LEN; 154 + pg_bytes_left -= LZO_LEN; 155 + 156 + tot_in += in_len; 157 + tot_out += out_len; 158 + 159 + /* copy bytes from the working buffer into the pages */ 160 + buf = workspace->cbuf; 161 + while (out_len) { 162 + bytes = min_t(unsigned long, pg_bytes_left, out_len); 163 + 164 + memcpy(cpage_out + out_offset, buf, bytes); 165 + 166 + out_len -= bytes; 167 + pg_bytes_left -= bytes; 168 + buf += bytes; 169 + out_offset += bytes; 170 + 171 + /* 172 + * we need another page for writing out. 173 + * 174 + * Note if there's less than 4 bytes left, we just 175 + * skip to a new page. 176 + */ 177 + if ((out_len == 0 && pg_bytes_left < LZO_LEN) || 178 + pg_bytes_left == 0) { 179 + if (pg_bytes_left) { 180 + memset(cpage_out + out_offset, 0, 181 + pg_bytes_left); 182 + tot_out += pg_bytes_left; 183 + } 184 + 185 + /* we're done, don't allocate new page */ 186 + if (out_len == 0 && tot_in >= len) 187 + break; 188 + 189 + kunmap(out_page); 190 + if (nr_pages == nr_dest_pages) { 191 + out_page = NULL; 192 + ret = -1; 193 + goto out; 194 + } 195 + 196 + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 197 + if (out_page == NULL) { 198 + ret = -ENOMEM; 199 + goto out; 200 + } 201 + cpage_out = kmap(out_page); 202 + pages[nr_pages++] = out_page; 203 + 204 + pg_bytes_left = PAGE_CACHE_SIZE; 205 + out_offset = 0; 206 + } 207 + } 208 + 209 + /* we're making it bigger, give up */ 210 + if (tot_in > 8192 && tot_in < tot_out) 211 + goto out; 212 + 213 + /* we're all done */ 214 + if (tot_in >= len) 215 + break; 216 + 217 + if (tot_out > max_out) 218 + break; 219 + 220 + bytes_left = len - tot_in; 221 + kunmap(in_page); 222 + page_cache_release(in_page); 223 + 224 + start += PAGE_CACHE_SIZE; 225 + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 226 + data_in = kmap(in_page); 227 + in_len = min(bytes_left, PAGE_CACHE_SIZE); 228 + } 229 + 230 + if (tot_out > tot_in) 231 + goto out; 232 + 233 + /* store the size of all chunks of compressed data */ 234 + cpage_out = kmap(pages[0]); 235 + write_compress_length(cpage_out, tot_out); 236 + 237 + kunmap(pages[0]); 238 + 239 + ret = 0; 240 + *total_out = tot_out; 241 + *total_in = tot_in; 242 + out: 243 + *out_pages = nr_pages; 244 + if (out_page) 245 + kunmap(out_page); 246 + 247 + if (in_page) { 248 + kunmap(in_page); 249 + page_cache_release(in_page); 250 + } 251 + 252 + return ret; 253 + } 254 + 255 + static int lzo_decompress_biovec(struct list_head *ws, 256 + struct page **pages_in, 257 + u64 disk_start, 258 + struct bio_vec *bvec, 259 + int vcnt, 260 + size_t srclen) 261 + { 262 + struct workspace *workspace = list_entry(ws, struct workspace, list); 263 + int ret = 0, ret2; 264 + char *data_in; 265 + unsigned long page_in_index = 0; 266 + unsigned long page_out_index = 0; 267 + unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 268 + PAGE_CACHE_SIZE; 269 + unsigned long buf_start; 270 + unsigned long buf_offset = 0; 271 + unsigned long bytes; 272 + unsigned long working_bytes; 273 + unsigned long pg_offset; 274 + 275 + size_t in_len; 276 + size_t out_len; 277 + unsigned long in_offset; 278 + unsigned long in_page_bytes_left; 279 + unsigned long tot_in; 280 + unsigned long tot_out; 281 + unsigned long tot_len; 282 + char *buf; 283 + 284 + data_in = kmap(pages_in[0]); 285 + tot_len = read_compress_length(data_in); 286 + 287 + tot_in = LZO_LEN; 288 + in_offset = LZO_LEN; 289 + tot_len = min_t(size_t, srclen, tot_len); 290 + in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; 291 + 292 + tot_out = 0; 293 + pg_offset = 0; 294 + 295 + while (tot_in < tot_len) { 296 + in_len = read_compress_length(data_in + in_offset); 297 + in_page_bytes_left -= LZO_LEN; 298 + in_offset += LZO_LEN; 299 + tot_in += LZO_LEN; 300 + 301 + tot_in += in_len; 302 + working_bytes = in_len; 303 + 304 + /* fast path: avoid using the working buffer */ 305 + if (in_page_bytes_left >= in_len) { 306 + buf = data_in + in_offset; 307 + bytes = in_len; 308 + goto cont; 309 + } 310 + 311 + /* copy bytes from the pages into the working buffer */ 312 + buf = workspace->cbuf; 313 + buf_offset = 0; 314 + while (working_bytes) { 315 + bytes = min(working_bytes, in_page_bytes_left); 316 + 317 + memcpy(buf + buf_offset, data_in + in_offset, bytes); 318 + buf_offset += bytes; 319 + cont: 320 + working_bytes -= bytes; 321 + in_page_bytes_left -= bytes; 322 + in_offset += bytes; 323 + 324 + /* check if we need to pick another page */ 325 + if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) 326 + || in_page_bytes_left == 0) { 327 + tot_in += in_page_bytes_left; 328 + 329 + if (working_bytes == 0 && tot_in >= tot_len) 330 + break; 331 + 332 + kunmap(pages_in[page_in_index]); 333 + page_in_index++; 334 + if (page_in_index >= total_pages_in) { 335 + ret = -1; 336 + data_in = NULL; 337 + goto done; 338 + } 339 + data_in = kmap(pages_in[page_in_index]); 340 + 341 + in_page_bytes_left = PAGE_CACHE_SIZE; 342 + in_offset = 0; 343 + } 344 + } 345 + 346 + out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 347 + ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 348 + &out_len); 349 + if (ret != LZO_E_OK) { 350 + printk(KERN_WARNING "btrfs decompress failed\n"); 351 + ret = -1; 352 + break; 353 + } 354 + 355 + buf_start = tot_out; 356 + tot_out += out_len; 357 + 358 + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, 359 + tot_out, disk_start, 360 + bvec, vcnt, 361 + &page_out_index, &pg_offset); 362 + if (ret2 == 0) 363 + break; 364 + } 365 + done: 366 + if (data_in) 367 + kunmap(pages_in[page_in_index]); 368 + return ret; 369 + } 370 + 371 + static int lzo_decompress(struct list_head *ws, unsigned char *data_in, 372 + struct page *dest_page, 373 + unsigned long start_byte, 374 + size_t srclen, size_t destlen) 375 + { 376 + struct workspace *workspace = list_entry(ws, struct workspace, list); 377 + size_t in_len; 378 + size_t out_len; 379 + size_t tot_len; 380 + int ret = 0; 381 + char *kaddr; 382 + unsigned long bytes; 383 + 384 + BUG_ON(srclen < LZO_LEN); 385 + 386 + tot_len = read_compress_length(data_in); 387 + data_in += LZO_LEN; 388 + 389 + in_len = read_compress_length(data_in); 390 + data_in += LZO_LEN; 391 + 392 + out_len = PAGE_CACHE_SIZE; 393 + ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); 394 + if (ret != LZO_E_OK) { 395 + printk(KERN_WARNING "btrfs decompress failed!\n"); 396 + ret = -1; 397 + goto out; 398 + } 399 + 400 + if (out_len < start_byte) { 401 + ret = -1; 402 + goto out; 403 + } 404 + 405 + bytes = min_t(unsigned long, destlen, out_len - start_byte); 406 + 407 + kaddr = kmap_atomic(dest_page, KM_USER0); 408 + memcpy(kaddr, workspace->buf + start_byte, bytes); 409 + kunmap_atomic(kaddr, KM_USER0); 410 + out: 411 + return ret; 412 + } 413 + 414 + struct btrfs_compress_op btrfs_lzo_compress = { 415 + .alloc_workspace = lzo_alloc_workspace, 416 + .free_workspace = lzo_free_workspace, 417 + .compress_pages = lzo_compress_pages, 418 + .decompress_biovec = lzo_decompress_biovec, 419 + .decompress = lzo_decompress, 420 + };

+15 -3

fs/btrfs/ordered-data.c

··· 172 */ 173 static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 174 u64 start, u64 len, u64 disk_len, 175 - int type, int dio) 176 { 177 struct btrfs_ordered_inode_tree *tree; 178 struct rb_node *node; ··· 189 entry->disk_len = disk_len; 190 entry->bytes_left = len; 191 entry->inode = inode; 192 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 193 set_bit(type, &entry->flags); 194 ··· 221 u64 start, u64 len, u64 disk_len, int type) 222 { 223 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 224 - disk_len, type, 0); 225 } 226 227 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 228 u64 start, u64 len, u64 disk_len, int type) 229 { 230 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 231 - disk_len, type, 1); 232 } 233 234 /*

··· 172 */ 173 static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 174 u64 start, u64 len, u64 disk_len, 175 + int type, int dio, int compress_type) 176 { 177 struct btrfs_ordered_inode_tree *tree; 178 struct rb_node *node; ··· 189 entry->disk_len = disk_len; 190 entry->bytes_left = len; 191 entry->inode = inode; 192 + entry->compress_type = compress_type; 193 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 194 set_bit(type, &entry->flags); 195 ··· 220 u64 start, u64 len, u64 disk_len, int type) 221 { 222 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 223 + disk_len, type, 0, 224 + BTRFS_COMPRESS_NONE); 225 } 226 227 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 228 u64 start, u64 len, u64 disk_len, int type) 229 { 230 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 231 + disk_len, type, 1, 232 + BTRFS_COMPRESS_NONE); 233 + } 234 + 235 + int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 236 + u64 start, u64 len, u64 disk_len, 237 + int type, int compress_type) 238 + { 239 + return __btrfs_add_ordered_extent(inode, file_offset, start, len, 240 + disk_len, type, 0, 241 + compress_type); 242 } 243 244 /*

+7 -1

fs/btrfs/ordered-data.h

··· 68 69 #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ 70 71 - #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ 72 73 #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ 74 ··· 92 93 /* flags (described above) */ 94 unsigned long flags; 95 96 /* reference count */ 97 atomic_t refs; ··· 151 u64 start, u64 len, u64 disk_len, int type); 152 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 153 u64 start, u64 len, u64 disk_len, int type); 154 int btrfs_add_ordered_sum(struct inode *inode, 155 struct btrfs_ordered_extent *entry, 156 struct btrfs_ordered_sum *sum);

··· 68 69 #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ 70 71 + #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ 72 73 #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ 74 ··· 92 93 /* flags (described above) */ 94 unsigned long flags; 95 + 96 + /* compression algorithm */ 97 + int compress_type; 98 99 /* reference count */ 100 atomic_t refs; ··· 148 u64 start, u64 len, u64 disk_len, int type); 149 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 150 u64 start, u64 len, u64 disk_len, int type); 151 + int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 152 + u64 start, u64 len, u64 disk_len, 153 + int type, int compress_type); 154 int btrfs_add_ordered_sum(struct inode *inode, 155 struct btrfs_ordered_extent *entry, 156 struct btrfs_ordered_sum *sum);

+263 -18

fs/btrfs/super.c

··· 54 55 static const struct super_operations btrfs_super_ops; 56 57 static void btrfs_put_super(struct super_block *sb) 58 { 59 struct btrfs_root *root = btrfs_sb(sb); ··· 153 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, 154 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 156 - Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 157 - Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, 158 - Opt_user_subvol_rm_allowed, 159 }; 160 161 static match_table_t tokens = { ··· 170 {Opt_alloc_start, "alloc_start=%s"}, 171 {Opt_thread_pool, "thread_pool=%d"}, 172 {Opt_compress, "compress"}, 173 {Opt_compress_force, "compress-force"}, 174 {Opt_ssd, "ssd"}, 175 {Opt_ssd_spread, "ssd_spread"}, 176 {Opt_nossd, "nossd"}, ··· 198 char *p, *num, *orig; 199 int intarg; 200 int ret = 0; 201 202 if (!options) 203 return 0; ··· 242 btrfs_set_opt(info->mount_opt, NODATACOW); 243 btrfs_set_opt(info->mount_opt, NODATASUM); 244 break; 245 - case Opt_compress: 246 - printk(KERN_INFO "btrfs: use compression\n"); 247 - btrfs_set_opt(info->mount_opt, COMPRESS); 248 - break; 249 case Opt_compress_force: 250 - printk(KERN_INFO "btrfs: forcing compression\n"); 251 - btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 252 btrfs_set_opt(info->mount_opt, COMPRESS); 253 break; 254 case Opt_ssd: 255 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); ··· 859 return 0; 860 } 861 862 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 863 { 864 struct btrfs_root *root = btrfs_sb(dentry->d_sb); ··· 987 struct list_head *head = &root->fs_info->space_info; 988 struct btrfs_space_info *found; 989 u64 total_used = 0; 990 - u64 total_used_data = 0; 991 int bits = dentry->d_sb->s_blocksize_bits; 992 __be32 *fsid = (__be32 *)root->fs_info->fsid; 993 994 rcu_read_lock(); 995 list_for_each_entry_rcu(found, head, list) { 996 - if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | 997 - BTRFS_BLOCK_GROUP_SYSTEM)) 998 - total_used_data += found->disk_total; 999 - else 1000 - total_used_data += found->disk_used; 1001 total_used += found->disk_used; 1002 } 1003 rcu_read_unlock(); ··· 1009 buf->f_namelen = BTRFS_NAME_LEN; 1010 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1011 buf->f_bfree = buf->f_blocks - (total_used >> bits); 1012 - buf->f_bavail = buf->f_blocks - (total_used_data >> bits); 1013 buf->f_bsize = dentry->d_sb->s_blocksize; 1014 buf->f_type = BTRFS_SUPER_MAGIC; 1015 1016 /* We treat it as constant endianness (it doesn't matter _which_) 1017 because we want the fsid to come out the same whether mounted ··· 1136 if (err) 1137 return err; 1138 1139 - err = btrfs_init_cachep(); 1140 if (err) 1141 goto free_sysfs; 1142 1143 err = extent_io_init(); 1144 if (err) ··· 1171 extent_io_exit(); 1172 free_cachep: 1173 btrfs_destroy_cachep(); 1174 free_sysfs: 1175 btrfs_exit_sysfs(); 1176 return err; ··· 1187 unregister_filesystem(&btrfs_fs_type); 1188 btrfs_exit_sysfs(); 1189 btrfs_cleanup_fs_uuids(); 1190 - btrfs_zlib_exit(); 1191 } 1192 1193 module_init(init_btrfs_fs)

··· 54 55 static const struct super_operations btrfs_super_ops; 56 57 + static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, 58 + char nbuf[16]) 59 + { 60 + char *errstr = NULL; 61 + 62 + switch (errno) { 63 + case -EIO: 64 + errstr = "IO failure"; 65 + break; 66 + case -ENOMEM: 67 + errstr = "Out of memory"; 68 + break; 69 + case -EROFS: 70 + errstr = "Readonly filesystem"; 71 + break; 72 + default: 73 + if (nbuf) { 74 + if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 75 + errstr = nbuf; 76 + } 77 + break; 78 + } 79 + 80 + return errstr; 81 + } 82 + 83 + static void __save_error_info(struct btrfs_fs_info *fs_info) 84 + { 85 + /* 86 + * today we only save the error info into ram. Long term we'll 87 + * also send it down to the disk 88 + */ 89 + fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; 90 + } 91 + 92 + /* NOTE: 93 + * We move write_super stuff at umount in order to avoid deadlock 94 + * for umount hold all lock. 95 + */ 96 + static void save_error_info(struct btrfs_fs_info *fs_info) 97 + { 98 + __save_error_info(fs_info); 99 + } 100 + 101 + /* btrfs handle error by forcing the filesystem readonly */ 102 + static void btrfs_handle_error(struct btrfs_fs_info *fs_info) 103 + { 104 + struct super_block *sb = fs_info->sb; 105 + 106 + if (sb->s_flags & MS_RDONLY) 107 + return; 108 + 109 + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 110 + sb->s_flags |= MS_RDONLY; 111 + printk(KERN_INFO "btrfs is forced readonly\n"); 112 + } 113 + } 114 + 115 + /* 116 + * __btrfs_std_error decodes expected errors from the caller and 117 + * invokes the approciate error response. 118 + */ 119 + void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 120 + unsigned int line, int errno) 121 + { 122 + struct super_block *sb = fs_info->sb; 123 + char nbuf[16]; 124 + const char *errstr; 125 + 126 + /* 127 + * Special case: if the error is EROFS, and we're already 128 + * under MS_RDONLY, then it is safe here. 129 + */ 130 + if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 131 + return; 132 + 133 + errstr = btrfs_decode_error(fs_info, errno, nbuf); 134 + printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 135 + sb->s_id, function, line, errstr); 136 + save_error_info(fs_info); 137 + 138 + btrfs_handle_error(fs_info); 139 + } 140 + 141 static void btrfs_put_super(struct super_block *sb) 142 { 143 struct btrfs_root *root = btrfs_sb(sb); ··· 69 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 72 + Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 73 + Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 74 + Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 75 }; 76 77 static match_table_t tokens = { ··· 86 {Opt_alloc_start, "alloc_start=%s"}, 87 {Opt_thread_pool, "thread_pool=%d"}, 88 {Opt_compress, "compress"}, 89 + {Opt_compress_type, "compress=%s"}, 90 {Opt_compress_force, "compress-force"}, 91 + {Opt_compress_force_type, "compress-force=%s"}, 92 {Opt_ssd, "ssd"}, 93 {Opt_ssd_spread, "ssd_spread"}, 94 {Opt_nossd, "nossd"}, ··· 112 char *p, *num, *orig; 113 int intarg; 114 int ret = 0; 115 + char *compress_type; 116 + bool compress_force = false; 117 118 if (!options) 119 return 0; ··· 154 btrfs_set_opt(info->mount_opt, NODATACOW); 155 btrfs_set_opt(info->mount_opt, NODATASUM); 156 break; 157 case Opt_compress_force: 158 + case Opt_compress_force_type: 159 + compress_force = true; 160 + case Opt_compress: 161 + case Opt_compress_type: 162 + if (token == Opt_compress || 163 + token == Opt_compress_force || 164 + strcmp(args[0].from, "zlib") == 0) { 165 + compress_type = "zlib"; 166 + info->compress_type = BTRFS_COMPRESS_ZLIB; 167 + } else if (strcmp(args[0].from, "lzo") == 0) { 168 + compress_type = "lzo"; 169 + info->compress_type = BTRFS_COMPRESS_LZO; 170 + } else { 171 + ret = -EINVAL; 172 + goto out; 173 + } 174 + 175 btrfs_set_opt(info->mount_opt, COMPRESS); 176 + if (compress_force) { 177 + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 178 + pr_info("btrfs: force %s compression\n", 179 + compress_type); 180 + } else 181 + pr_info("btrfs: use %s compression\n", 182 + compress_type); 183 break; 184 case Opt_ssd: 185 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); ··· 753 return 0; 754 } 755 756 + /* 757 + * The helper to calc the free space on the devices that can be used to store 758 + * file data. 759 + */ 760 + static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) 761 + { 762 + struct btrfs_fs_info *fs_info = root->fs_info; 763 + struct btrfs_device_info *devices_info; 764 + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 765 + struct btrfs_device *device; 766 + u64 skip_space; 767 + u64 type; 768 + u64 avail_space; 769 + u64 used_space; 770 + u64 min_stripe_size; 771 + int min_stripes = 1; 772 + int i = 0, nr_devices; 773 + int ret; 774 + 775 + nr_devices = fs_info->fs_devices->rw_devices; 776 + BUG_ON(!nr_devices); 777 + 778 + devices_info = kmalloc(sizeof(*devices_info) * nr_devices, 779 + GFP_NOFS); 780 + if (!devices_info) 781 + return -ENOMEM; 782 + 783 + /* calc min stripe number for data space alloction */ 784 + type = btrfs_get_alloc_profile(root, 1); 785 + if (type & BTRFS_BLOCK_GROUP_RAID0) 786 + min_stripes = 2; 787 + else if (type & BTRFS_BLOCK_GROUP_RAID1) 788 + min_stripes = 2; 789 + else if (type & BTRFS_BLOCK_GROUP_RAID10) 790 + min_stripes = 4; 791 + 792 + if (type & BTRFS_BLOCK_GROUP_DUP) 793 + min_stripe_size = 2 * BTRFS_STRIPE_LEN; 794 + else 795 + min_stripe_size = BTRFS_STRIPE_LEN; 796 + 797 + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 798 + if (!device->in_fs_metadata) 799 + continue; 800 + 801 + avail_space = device->total_bytes - device->bytes_used; 802 + 803 + /* align with stripe_len */ 804 + do_div(avail_space, BTRFS_STRIPE_LEN); 805 + avail_space *= BTRFS_STRIPE_LEN; 806 + 807 + /* 808 + * In order to avoid overwritting the superblock on the drive, 809 + * btrfs starts at an offset of at least 1MB when doing chunk 810 + * allocation. 811 + */ 812 + skip_space = 1024 * 1024; 813 + 814 + /* user can set the offset in fs_info->alloc_start. */ 815 + if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= 816 + device->total_bytes) 817 + skip_space = max(fs_info->alloc_start, skip_space); 818 + 819 + /* 820 + * btrfs can not use the free space in [0, skip_space - 1], 821 + * we must subtract it from the total. In order to implement 822 + * it, we account the used space in this range first. 823 + */ 824 + ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, 825 + &used_space); 826 + if (ret) { 827 + kfree(devices_info); 828 + return ret; 829 + } 830 + 831 + /* calc the free space in [0, skip_space - 1] */ 832 + skip_space -= used_space; 833 + 834 + /* 835 + * we can use the free space in [0, skip_space - 1], subtract 836 + * it from the total. 837 + */ 838 + if (avail_space && avail_space >= skip_space) 839 + avail_space -= skip_space; 840 + else 841 + avail_space = 0; 842 + 843 + if (avail_space < min_stripe_size) 844 + continue; 845 + 846 + devices_info[i].dev = device; 847 + devices_info[i].max_avail = avail_space; 848 + 849 + i++; 850 + } 851 + 852 + nr_devices = i; 853 + 854 + btrfs_descending_sort_devices(devices_info, nr_devices); 855 + 856 + i = nr_devices - 1; 857 + avail_space = 0; 858 + while (nr_devices >= min_stripes) { 859 + if (devices_info[i].max_avail >= min_stripe_size) { 860 + int j; 861 + u64 alloc_size; 862 + 863 + avail_space += devices_info[i].max_avail * min_stripes; 864 + alloc_size = devices_info[i].max_avail; 865 + for (j = i + 1 - min_stripes; j <= i; j++) 866 + devices_info[j].max_avail -= alloc_size; 867 + } 868 + i--; 869 + nr_devices--; 870 + } 871 + 872 + kfree(devices_info); 873 + *free_bytes = avail_space; 874 + return 0; 875 + } 876 + 877 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 878 { 879 struct btrfs_root *root = btrfs_sb(dentry->d_sb); ··· 760 struct list_head *head = &root->fs_info->space_info; 761 struct btrfs_space_info *found; 762 u64 total_used = 0; 763 + u64 total_free_data = 0; 764 int bits = dentry->d_sb->s_blocksize_bits; 765 __be32 *fsid = (__be32 *)root->fs_info->fsid; 766 + int ret; 767 768 + /* holding chunk_muext to avoid allocating new chunks */ 769 + mutex_lock(&root->fs_info->chunk_mutex); 770 rcu_read_lock(); 771 list_for_each_entry_rcu(found, head, list) { 772 + if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 773 + total_free_data += found->disk_total - found->disk_used; 774 + total_free_data -= 775 + btrfs_account_ro_block_groups_free_space(found); 776 + } 777 + 778 total_used += found->disk_used; 779 } 780 rcu_read_unlock(); ··· 778 buf->f_namelen = BTRFS_NAME_LEN; 779 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 780 buf->f_bfree = buf->f_blocks - (total_used >> bits); 781 buf->f_bsize = dentry->d_sb->s_blocksize; 782 buf->f_type = BTRFS_SUPER_MAGIC; 783 + buf->f_bavail = total_free_data; 784 + ret = btrfs_calc_avail_data_space(root, &total_free_data); 785 + if (ret) { 786 + mutex_unlock(&root->fs_info->chunk_mutex); 787 + return ret; 788 + } 789 + buf->f_bavail += total_free_data; 790 + buf->f_bavail = buf->f_bavail >> bits; 791 + mutex_unlock(&root->fs_info->chunk_mutex); 792 793 /* We treat it as constant endianness (it doesn't matter _which_) 794 because we want the fsid to come out the same whether mounted ··· 897 if (err) 898 return err; 899 900 + err = btrfs_init_compress(); 901 if (err) 902 goto free_sysfs; 903 + 904 + err = btrfs_init_cachep(); 905 + if (err) 906 + goto free_compress; 907 908 err = extent_io_init(); 909 if (err) ··· 928 extent_io_exit(); 929 free_cachep: 930 btrfs_destroy_cachep(); 931 + free_compress: 932 + btrfs_exit_compress(); 933 free_sysfs: 934 btrfs_exit_sysfs(); 935 return err; ··· 942 unregister_filesystem(&btrfs_fs_type); 943 btrfs_exit_sysfs(); 944 btrfs_cleanup_fs_uuids(); 945 + btrfs_exit_compress(); 946 } 947 948 module_init(init_btrfs_fs)

+11

fs/btrfs/transaction.c

··· 181 struct btrfs_trans_handle *h; 182 struct btrfs_transaction *cur_trans; 183 int ret; 184 again: 185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 186 if (!h) ··· 913 u64 to_reserve = 0; 914 u64 index = 0; 915 u64 objectid; 916 917 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 918 if (!new_root_item) { ··· 970 record_root_in_trans(trans, root); 971 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 972 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 973 974 old = btrfs_lock_root_node(root); 975 btrfs_cow_block(trans, root, old, NULL, 0, &old);

··· 181 struct btrfs_trans_handle *h; 182 struct btrfs_transaction *cur_trans; 183 int ret; 184 + 185 + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 186 + return ERR_PTR(-EROFS); 187 again: 188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 189 if (!h) ··· 910 u64 to_reserve = 0; 911 u64 index = 0; 912 u64 objectid; 913 + u64 root_flags; 914 915 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 916 if (!new_root_item) { ··· 966 record_root_in_trans(trans, root); 967 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 968 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 969 + 970 + root_flags = btrfs_root_flags(new_root_item); 971 + if (pending->readonly) 972 + root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; 973 + else 974 + root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; 975 + btrfs_set_root_flags(new_root_item, root_flags); 976 977 old = btrfs_lock_root_node(root); 978 btrfs_cow_block(trans, root, old, NULL, 0, &old);

+1

fs/btrfs/transaction.h

··· 62 struct btrfs_block_rsv block_rsv; 63 /* extra metadata reseration for relocation */ 64 int error; 65 struct list_head list; 66 }; 67

··· 62 struct btrfs_block_rsv block_rsv; 63 /* extra metadata reseration for relocation */ 64 int error; 65 + bool readonly; 66 struct list_head list; 67 }; 68

+474 -182

fs/btrfs/volumes.c

··· 22 #include <linux/blkdev.h> 23 #include <linux/random.h> 24 #include <linux/iocontext.h> 25 #include <asm/div64.h> 26 #include "compat.h" 27 #include "ctree.h" ··· 601 set_blocksize(bdev, 4096); 602 603 bh = btrfs_read_dev_super(bdev); 604 - if (!bh) 605 goto error_close; 606 607 disk_super = (struct btrfs_super_block *)bh->b_data; 608 devid = btrfs_stack_device_id(&disk_super->dev_item); ··· 706 goto error_close; 707 bh = btrfs_read_dev_super(bdev); 708 if (!bh) { 709 - ret = -EIO; 710 goto error_close; 711 } 712 disk_super = (struct btrfs_super_block *)bh->b_data; ··· 732 return ret; 733 } 734 735 - /* 736 - * this uses a pretty simple search, the expectation is that it is 737 - * called very infrequently and that a given device has a small number 738 - * of extents 739 - */ 740 - int find_free_dev_extent(struct btrfs_trans_handle *trans, 741 - struct btrfs_device *device, u64 num_bytes, 742 - u64 *start, u64 *max_avail) 743 { 744 struct btrfs_key key; 745 struct btrfs_root *root = device->dev_root; 746 - struct btrfs_dev_extent *dev_extent = NULL; 747 struct btrfs_path *path; 748 - u64 hole_size = 0; 749 - u64 last_byte = 0; 750 - u64 search_start = 0; 751 - u64 search_end = device->total_bytes; 752 int ret; 753 - int slot = 0; 754 - int start_found; 755 struct extent_buffer *l; 756 757 path = btrfs_alloc_path(); 758 if (!path) 759 return -ENOMEM; 760 path->reada = 2; 761 - start_found = 0; 762 - 763 - /* FIXME use last free of some kind */ 764 - 765 - /* we don't want to overwrite the superblock on the drive, 766 - * so we make sure to start at an offset of at least 1MB 767 - */ 768 - search_start = max((u64)1024 * 1024, search_start); 769 - 770 - if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) 771 - search_start = max(root->fs_info->alloc_start, search_start); 772 773 key.objectid = device->devid; 774 - key.offset = search_start; 775 key.type = BTRFS_DEV_EXTENT_KEY; 776 - ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 777 if (ret < 0) 778 - goto error; 779 if (ret > 0) { 780 ret = btrfs_previous_item(root, path, key.objectid, key.type); 781 if (ret < 0) 782 - goto error; 783 - if (ret > 0) 784 - start_found = 1; 785 } 786 - l = path->nodes[0]; 787 - btrfs_item_key_to_cpu(l, &key, path->slots[0]); 788 while (1) { 789 l = path->nodes[0]; 790 slot = path->slots[0]; ··· 776 if (ret == 0) 777 continue; 778 if (ret < 0) 779 - goto error; 780 - no_more_items: 781 - if (!start_found) { 782 - if (search_start >= search_end) { 783 - ret = -ENOSPC; 784 - goto error; 785 - } 786 - *start = search_start; 787 - start_found = 1; 788 - goto check_pending; 789 - } 790 - *start = last_byte > search_start ? 791 - last_byte : search_start; 792 - if (search_end <= *start) { 793 - ret = -ENOSPC; 794 - goto error; 795 - } 796 - goto check_pending; 797 } 798 btrfs_item_key_to_cpu(l, &key, slot); 799 ··· 786 goto next; 787 788 if (key.objectid > device->devid) 789 - goto no_more_items; 790 791 - if (key.offset >= search_start && key.offset > last_byte && 792 - start_found) { 793 - if (last_byte < search_start) 794 - last_byte = search_start; 795 - hole_size = key.offset - last_byte; 796 - 797 - if (hole_size > *max_avail) 798 - *max_avail = hole_size; 799 - 800 - if (key.offset > last_byte && 801 - hole_size >= num_bytes) { 802 - *start = last_byte; 803 - goto check_pending; 804 - } 805 - } 806 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 807 goto next; 808 809 - start_found = 1; 810 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 811 - last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); 812 next: 813 path->slots[0]++; 814 cond_resched(); 815 } 816 - check_pending: 817 - /* we have to make sure we didn't find an extent that has already 818 - * been allocated by the map tree or the original allocation 819 - */ 820 - BUG_ON(*start < search_start); 821 822 - if (*start + num_bytes > search_end) { 823 - ret = -ENOSPC; 824 - goto error; 825 } 826 - /* check for pending inserts here */ 827 - ret = 0; 828 829 - error: 830 btrfs_free_path(path); 831 return ret; 832 } 833 ··· 1306 set_blocksize(bdev, 4096); 1307 bh = btrfs_read_dev_super(bdev); 1308 if (!bh) { 1309 - ret = -EIO; 1310 goto error_close; 1311 } 1312 disk_super = (struct btrfs_super_block *)bh->b_data; ··· 2026 if (dev_root->fs_info->sb->s_flags & MS_RDONLY) 2027 return -EROFS; 2028 2029 mutex_lock(&dev_root->fs_info->volume_mutex); 2030 dev_root = dev_root->fs_info->dev_root; 2031 ··· 2267 return calc_size * num_stripes; 2268 } 2269 2270 - static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2271 - struct btrfs_root *extent_root, 2272 - struct map_lookup **map_ret, 2273 - u64 *num_bytes, u64 *stripe_size, 2274 - u64 start, u64 type) 2275 { 2276 - struct btrfs_fs_info *info = extent_root->fs_info; 2277 - struct btrfs_device *device = NULL; 2278 - struct btrfs_fs_devices *fs_devices = info->fs_devices; 2279 - struct list_head *cur; 2280 - struct map_lookup *map = NULL; 2281 - struct extent_map_tree *em_tree; 2282 - struct extent_map *em; 2283 - struct list_head private_devs; 2284 - int min_stripe_size = 1 * 1024 * 1024; 2285 - u64 calc_size = 1024 * 1024 * 1024; 2286 - u64 max_chunk_size = calc_size; 2287 - u64 min_free; 2288 - u64 avail; 2289 - u64 max_avail = 0; 2290 - u64 dev_offset; 2291 - int num_stripes = 1; 2292 - int min_stripes = 1; 2293 - int sub_stripes = 0; 2294 - int looped = 0; 2295 - int ret; 2296 - int index; 2297 - int stripe_len = 64 * 1024; 2298 2299 - if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2300 - (type & BTRFS_BLOCK_GROUP_DUP)) { 2301 - WARN_ON(1); 2302 - type &= ~BTRFS_BLOCK_GROUP_DUP; 2303 - } 2304 - if (list_empty(&fs_devices->alloc_list)) 2305 - return -ENOSPC; 2306 2307 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2308 - num_stripes = fs_devices->rw_devices; 2309 - min_stripes = 2; 2310 } 2311 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 2312 - num_stripes = 2; 2313 - min_stripes = 2; 2314 } 2315 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2316 if (fs_devices->rw_devices < 2) 2317 return -ENOSPC; 2318 - num_stripes = 2; 2319 - min_stripes = 2; 2320 } 2321 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2322 - num_stripes = fs_devices->rw_devices; 2323 - if (num_stripes < 4) 2324 return -ENOSPC; 2325 - num_stripes &= ~(u32)1; 2326 - sub_stripes = 2; 2327 - min_stripes = 4; 2328 } 2329 2330 if (type & BTRFS_BLOCK_GROUP_DATA) { 2331 max_chunk_size = 10 * calc_size; ··· 2344 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2345 max_chunk_size); 2346 2347 - again: 2348 - max_avail = 0; 2349 - if (!map || map->num_stripes != num_stripes) { 2350 - kfree(map); 2351 - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2352 - if (!map) 2353 - return -ENOMEM; 2354 - map->num_stripes = num_stripes; 2355 - } 2356 - 2357 - if (calc_size * num_stripes > max_chunk_size) { 2358 - calc_size = max_chunk_size; 2359 do_div(calc_size, num_stripes); 2360 - do_div(calc_size, stripe_len); 2361 - calc_size *= stripe_len; 2362 } 2363 2364 /* we don't want tiny stripes */ 2365 - if (!looped) 2366 calc_size = max_t(u64, min_stripe_size, calc_size); 2367 2368 /* 2369 - * we're about to do_div by the stripe_len so lets make sure 2370 * we end up with something bigger than a stripe 2371 */ 2372 - calc_size = max_t(u64, calc_size, stripe_len * 4); 2373 2374 - do_div(calc_size, stripe_len); 2375 - calc_size *= stripe_len; 2376 2377 cur = fs_devices->alloc_list.next; 2378 index = 0; 2379 2380 - if (type & BTRFS_BLOCK_GROUP_DUP) 2381 min_free = calc_size * 2; 2382 - else 2383 min_free = calc_size; 2384 - 2385 - /* 2386 - * we add 1MB because we never use the first 1MB of the device, unless 2387 - * we've looped, then we are likely allocating the maximum amount of 2388 - * space left already 2389 - */ 2390 - if (!looped) 2391 - min_free += 1024 * 1024; 2392 2393 INIT_LIST_HEAD(&private_devs); 2394 while (index < num_stripes) { ··· 2559 cur = cur->next; 2560 2561 if (device->in_fs_metadata && avail >= min_free) { 2562 - ret = find_free_dev_extent(trans, device, 2563 - min_free, &dev_offset, 2564 - &max_avail); 2565 if (ret == 0) { 2566 list_move_tail(&device->dev_alloc_list, 2567 &private_devs); 2568 map->stripes[index].dev = device; 2569 - map->stripes[index].physical = dev_offset; 2570 index++; 2571 if (type & BTRFS_BLOCK_GROUP_DUP) { 2572 map->stripes[index].dev = device; 2573 map->stripes[index].physical = 2574 - dev_offset + calc_size; 2575 index++; 2576 } 2577 - } 2578 - } else if (device->in_fs_metadata && avail > max_avail) 2579 - max_avail = avail; 2580 if (cur == &fs_devices->alloc_list) 2581 break; 2582 } 2583 list_splice(&private_devs, &fs_devices->alloc_list); 2584 if (index < num_stripes) { 2585 if (index >= min_stripes) { ··· 2600 num_stripes /= sub_stripes; 2601 num_stripes *= sub_stripes; 2602 } 2603 - looped = 1; 2604 - goto again; 2605 } 2606 - if (!looped && max_avail > 0) { 2607 - looped = 1; 2608 - calc_size = max_avail; 2609 - goto again; 2610 - } 2611 - kfree(map); 2612 - return -ENOSPC; 2613 } 2614 map->sector_size = extent_root->sectorsize; 2615 - map->stripe_len = stripe_len; 2616 - map->io_align = stripe_len; 2617 - map->io_width = stripe_len; 2618 map->type = type; 2619 - map->num_stripes = num_stripes; 2620 map->sub_stripes = sub_stripes; 2621 2622 *map_ret = map; 2623 *stripe_size = calc_size; 2624 *num_bytes = chunk_bytes_by_type(type, calc_size, 2625 - num_stripes, sub_stripes); 2626 2627 em = alloc_extent_map(GFP_NOFS); 2628 if (!em) { 2629 - kfree(map); 2630 - return -ENOMEM; 2631 } 2632 em->bdev = (struct block_device *)map; 2633 em->start = start; ··· 2662 index++; 2663 } 2664 2665 return 0; 2666 } 2667 2668 static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,

··· 22 #include <linux/blkdev.h> 23 #include <linux/random.h> 24 #include <linux/iocontext.h> 25 + #include <linux/capability.h> 26 #include <asm/div64.h> 27 #include "compat.h" 28 #include "ctree.h" ··· 600 set_blocksize(bdev, 4096); 601 602 bh = btrfs_read_dev_super(bdev); 603 + if (!bh) { 604 + ret = -EINVAL; 605 goto error_close; 606 + } 607 608 disk_super = (struct btrfs_super_block *)bh->b_data; 609 devid = btrfs_stack_device_id(&disk_super->dev_item); ··· 703 goto error_close; 704 bh = btrfs_read_dev_super(bdev); 705 if (!bh) { 706 + ret = -EINVAL; 707 goto error_close; 708 } 709 disk_super = (struct btrfs_super_block *)bh->b_data; ··· 729 return ret; 730 } 731 732 + /* helper to account the used device space in the range */ 733 + int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 734 + u64 end, u64 *length) 735 { 736 struct btrfs_key key; 737 struct btrfs_root *root = device->dev_root; 738 + struct btrfs_dev_extent *dev_extent; 739 struct btrfs_path *path; 740 + u64 extent_end; 741 int ret; 742 + int slot; 743 struct extent_buffer *l; 744 + 745 + *length = 0; 746 + 747 + if (start >= device->total_bytes) 748 + return 0; 749 750 path = btrfs_alloc_path(); 751 if (!path) 752 return -ENOMEM; 753 path->reada = 2; 754 755 key.objectid = device->devid; 756 + key.offset = start; 757 key.type = BTRFS_DEV_EXTENT_KEY; 758 + 759 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 760 if (ret < 0) 761 + goto out; 762 if (ret > 0) { 763 ret = btrfs_previous_item(root, path, key.objectid, key.type); 764 if (ret < 0) 765 + goto out; 766 } 767 + 768 while (1) { 769 l = path->nodes[0]; 770 slot = path->slots[0]; ··· 790 if (ret == 0) 791 continue; 792 if (ret < 0) 793 + goto out; 794 + 795 + break; 796 } 797 btrfs_item_key_to_cpu(l, &key, slot); 798 ··· 815 goto next; 816 817 if (key.objectid > device->devid) 818 + break; 819 820 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 821 goto next; 822 823 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 824 + extent_end = key.offset + btrfs_dev_extent_length(l, 825 + dev_extent); 826 + if (key.offset <= start && extent_end > end) { 827 + *length = end - start + 1; 828 + break; 829 + } else if (key.offset <= start && extent_end > start) 830 + *length += extent_end - start; 831 + else if (key.offset > start && extent_end <= end) 832 + *length += extent_end - key.offset; 833 + else if (key.offset > start && key.offset <= end) { 834 + *length += end - key.offset + 1; 835 + break; 836 + } else if (key.offset > end) 837 + break; 838 + 839 + next: 840 + path->slots[0]++; 841 + } 842 + ret = 0; 843 + out: 844 + btrfs_free_path(path); 845 + return ret; 846 + } 847 + 848 + /* 849 + * find_free_dev_extent - find free space in the specified device 850 + * @trans: transaction handler 851 + * @device: the device which we search the free space in 852 + * @num_bytes: the size of the free space that we need 853 + * @start: store the start of the free space. 854 + * @len: the size of the free space. that we find, or the size of the max 855 + * free space if we don't find suitable free space 856 + * 857 + * this uses a pretty simple search, the expectation is that it is 858 + * called very infrequently and that a given device has a small number 859 + * of extents 860 + * 861 + * @start is used to store the start of the free space if we find. But if we 862 + * don't find suitable free space, it will be used to store the start position 863 + * of the max free space. 864 + * 865 + * @len is used to store the size of the free space that we find. 866 + * But if we don't find suitable free space, it is used to store the size of 867 + * the max free space. 868 + */ 869 + int find_free_dev_extent(struct btrfs_trans_handle *trans, 870 + struct btrfs_device *device, u64 num_bytes, 871 + u64 *start, u64 *len) 872 + { 873 + struct btrfs_key key; 874 + struct btrfs_root *root = device->dev_root; 875 + struct btrfs_dev_extent *dev_extent; 876 + struct btrfs_path *path; 877 + u64 hole_size; 878 + u64 max_hole_start; 879 + u64 max_hole_size; 880 + u64 extent_end; 881 + u64 search_start; 882 + u64 search_end = device->total_bytes; 883 + int ret; 884 + int slot; 885 + struct extent_buffer *l; 886 + 887 + /* FIXME use last free of some kind */ 888 + 889 + /* we don't want to overwrite the superblock on the drive, 890 + * so we make sure to start at an offset of at least 1MB 891 + */ 892 + search_start = 1024 * 1024; 893 + 894 + if (root->fs_info->alloc_start + num_bytes <= search_end) 895 + search_start = max(root->fs_info->alloc_start, search_start); 896 + 897 + max_hole_start = search_start; 898 + max_hole_size = 0; 899 + 900 + if (search_start >= search_end) { 901 + ret = -ENOSPC; 902 + goto error; 903 + } 904 + 905 + path = btrfs_alloc_path(); 906 + if (!path) { 907 + ret = -ENOMEM; 908 + goto error; 909 + } 910 + path->reada = 2; 911 + 912 + key.objectid = device->devid; 913 + key.offset = search_start; 914 + key.type = BTRFS_DEV_EXTENT_KEY; 915 + 916 + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 917 + if (ret < 0) 918 + goto out; 919 + if (ret > 0) { 920 + ret = btrfs_previous_item(root, path, key.objectid, key.type); 921 + if (ret < 0) 922 + goto out; 923 + } 924 + 925 + while (1) { 926 + l = path->nodes[0]; 927 + slot = path->slots[0]; 928 + if (slot >= btrfs_header_nritems(l)) { 929 + ret = btrfs_next_leaf(root, path); 930 + if (ret == 0) 931 + continue; 932 + if (ret < 0) 933 + goto out; 934 + 935 + break; 936 + } 937 + btrfs_item_key_to_cpu(l, &key, slot); 938 + 939 + if (key.objectid < device->devid) 940 + goto next; 941 + 942 + if (key.objectid > device->devid) 943 + break; 944 + 945 + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 946 + goto next; 947 + 948 + if (key.offset > search_start) { 949 + hole_size = key.offset - search_start; 950 + 951 + if (hole_size > max_hole_size) { 952 + max_hole_start = search_start; 953 + max_hole_size = hole_size; 954 + } 955 + 956 + /* 957 + * If this free space is greater than which we need, 958 + * it must be the max free space that we have found 959 + * until now, so max_hole_start must point to the start 960 + * of this free space and the length of this free space 961 + * is stored in max_hole_size. Thus, we return 962 + * max_hole_start and max_hole_size and go back to the 963 + * caller. 964 + */ 965 + if (hole_size >= num_bytes) { 966 + ret = 0; 967 + goto out; 968 + } 969 + } 970 + 971 + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 972 + extent_end = key.offset + btrfs_dev_extent_length(l, 973 + dev_extent); 974 + if (extent_end > search_start) 975 + search_start = extent_end; 976 next: 977 path->slots[0]++; 978 cond_resched(); 979 } 980 981 + hole_size = search_end- search_start; 982 + if (hole_size > max_hole_size) { 983 + max_hole_start = search_start; 984 + max_hole_size = hole_size; 985 } 986 987 + /* See above. */ 988 + if (hole_size < num_bytes) 989 + ret = -ENOSPC; 990 + else 991 + ret = 0; 992 + 993 + out: 994 btrfs_free_path(path); 995 + error: 996 + *start = max_hole_start; 997 + if (len) 998 + *len = max_hole_size; 999 return ret; 1000 } 1001 ··· 1196 set_blocksize(bdev, 4096); 1197 bh = btrfs_read_dev_super(bdev); 1198 if (!bh) { 1199 + ret = -EINVAL; 1200 goto error_close; 1201 } 1202 disk_super = (struct btrfs_super_block *)bh->b_data; ··· 1916 if (dev_root->fs_info->sb->s_flags & MS_RDONLY) 1917 return -EROFS; 1918 1919 + if (!capable(CAP_SYS_ADMIN)) 1920 + return -EPERM; 1921 + 1922 mutex_lock(&dev_root->fs_info->volume_mutex); 1923 dev_root = dev_root->fs_info->dev_root; 1924 ··· 2154 return calc_size * num_stripes; 2155 } 2156 2157 + /* Used to sort the devices by max_avail(descending sort) */ 2158 + int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) 2159 { 2160 + if (((struct btrfs_device_info *)dev_info1)->max_avail > 2161 + ((struct btrfs_device_info *)dev_info2)->max_avail) 2162 + return -1; 2163 + else if (((struct btrfs_device_info *)dev_info1)->max_avail < 2164 + ((struct btrfs_device_info *)dev_info2)->max_avail) 2165 + return 1; 2166 + else 2167 + return 0; 2168 + } 2169 2170 + static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, 2171 + int *num_stripes, int *min_stripes, 2172 + int *sub_stripes) 2173 + { 2174 + *num_stripes = 1; 2175 + *min_stripes = 1; 2176 + *sub_stripes = 0; 2177 2178 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2179 + *num_stripes = fs_devices->rw_devices; 2180 + *min_stripes = 2; 2181 } 2182 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 2183 + *num_stripes = 2; 2184 + *min_stripes = 2; 2185 } 2186 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2187 if (fs_devices->rw_devices < 2) 2188 return -ENOSPC; 2189 + *num_stripes = 2; 2190 + *min_stripes = 2; 2191 } 2192 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2193 + *num_stripes = fs_devices->rw_devices; 2194 + if (*num_stripes < 4) 2195 return -ENOSPC; 2196 + *num_stripes &= ~(u32)1; 2197 + *sub_stripes = 2; 2198 + *min_stripes = 4; 2199 } 2200 + 2201 + return 0; 2202 + } 2203 + 2204 + static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, 2205 + u64 proposed_size, u64 type, 2206 + int num_stripes, int small_stripe) 2207 + { 2208 + int min_stripe_size = 1 * 1024 * 1024; 2209 + u64 calc_size = proposed_size; 2210 + u64 max_chunk_size = calc_size; 2211 + int ncopies = 1; 2212 + 2213 + if (type & (BTRFS_BLOCK_GROUP_RAID1 | 2214 + BTRFS_BLOCK_GROUP_DUP | 2215 + BTRFS_BLOCK_GROUP_RAID10)) 2216 + ncopies = 2; 2217 2218 if (type & BTRFS_BLOCK_GROUP_DATA) { 2219 max_chunk_size = 10 * calc_size; ··· 2230 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2231 max_chunk_size); 2232 2233 + if (calc_size * num_stripes > max_chunk_size * ncopies) { 2234 + calc_size = max_chunk_size * ncopies; 2235 do_div(calc_size, num_stripes); 2236 + do_div(calc_size, BTRFS_STRIPE_LEN); 2237 + calc_size *= BTRFS_STRIPE_LEN; 2238 } 2239 2240 /* we don't want tiny stripes */ 2241 + if (!small_stripe) 2242 calc_size = max_t(u64, min_stripe_size, calc_size); 2243 2244 /* 2245 + * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure 2246 * we end up with something bigger than a stripe 2247 */ 2248 + calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); 2249 2250 + do_div(calc_size, BTRFS_STRIPE_LEN); 2251 + calc_size *= BTRFS_STRIPE_LEN; 2252 + 2253 + return calc_size; 2254 + } 2255 + 2256 + static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, 2257 + int num_stripes) 2258 + { 2259 + struct map_lookup *new; 2260 + size_t len = map_lookup_size(num_stripes); 2261 + 2262 + BUG_ON(map->num_stripes < num_stripes); 2263 + 2264 + if (map->num_stripes == num_stripes) 2265 + return map; 2266 + 2267 + new = kmalloc(len, GFP_NOFS); 2268 + if (!new) { 2269 + /* just change map->num_stripes */ 2270 + map->num_stripes = num_stripes; 2271 + return map; 2272 + } 2273 + 2274 + memcpy(new, map, len); 2275 + new->num_stripes = num_stripes; 2276 + kfree(map); 2277 + return new; 2278 + } 2279 + 2280 + /* 2281 + * helper to allocate device space from btrfs_device_info, in which we stored 2282 + * max free space information of every device. It is used when we can not 2283 + * allocate chunks by default size. 2284 + * 2285 + * By this helper, we can allocate a new chunk as larger as possible. 2286 + */ 2287 + static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, 2288 + struct btrfs_fs_devices *fs_devices, 2289 + struct btrfs_device_info *devices, 2290 + int nr_device, u64 type, 2291 + struct map_lookup **map_lookup, 2292 + int min_stripes, u64 *stripe_size) 2293 + { 2294 + int i, index, sort_again = 0; 2295 + int min_devices = min_stripes; 2296 + u64 max_avail, min_free; 2297 + struct map_lookup *map = *map_lookup; 2298 + int ret; 2299 + 2300 + if (nr_device < min_stripes) 2301 + return -ENOSPC; 2302 + 2303 + btrfs_descending_sort_devices(devices, nr_device); 2304 + 2305 + max_avail = devices[0].max_avail; 2306 + if (!max_avail) 2307 + return -ENOSPC; 2308 + 2309 + for (i = 0; i < nr_device; i++) { 2310 + /* 2311 + * if dev_offset = 0, it means the free space of this device 2312 + * is less than what we need, and we didn't search max avail 2313 + * extent on this device, so do it now. 2314 + */ 2315 + if (!devices[i].dev_offset) { 2316 + ret = find_free_dev_extent(trans, devices[i].dev, 2317 + max_avail, 2318 + &devices[i].dev_offset, 2319 + &devices[i].max_avail); 2320 + if (ret != 0 && ret != -ENOSPC) 2321 + return ret; 2322 + sort_again = 1; 2323 + } 2324 + } 2325 + 2326 + /* we update the max avail free extent of each devices, sort again */ 2327 + if (sort_again) 2328 + btrfs_descending_sort_devices(devices, nr_device); 2329 + 2330 + if (type & BTRFS_BLOCK_GROUP_DUP) 2331 + min_devices = 1; 2332 + 2333 + if (!devices[min_devices - 1].max_avail) 2334 + return -ENOSPC; 2335 + 2336 + max_avail = devices[min_devices - 1].max_avail; 2337 + if (type & BTRFS_BLOCK_GROUP_DUP) 2338 + do_div(max_avail, 2); 2339 + 2340 + max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, 2341 + min_stripes, 1); 2342 + if (type & BTRFS_BLOCK_GROUP_DUP) 2343 + min_free = max_avail * 2; 2344 + else 2345 + min_free = max_avail; 2346 + 2347 + if (min_free > devices[min_devices - 1].max_avail) 2348 + return -ENOSPC; 2349 + 2350 + map = __shrink_map_lookup_stripes(map, min_stripes); 2351 + *stripe_size = max_avail; 2352 + 2353 + index = 0; 2354 + for (i = 0; i < min_stripes; i++) { 2355 + map->stripes[i].dev = devices[index].dev; 2356 + map->stripes[i].physical = devices[index].dev_offset; 2357 + if (type & BTRFS_BLOCK_GROUP_DUP) { 2358 + i++; 2359 + map->stripes[i].dev = devices[index].dev; 2360 + map->stripes[i].physical = devices[index].dev_offset + 2361 + max_avail; 2362 + } 2363 + index++; 2364 + } 2365 + *map_lookup = map; 2366 + 2367 + return 0; 2368 + } 2369 + 2370 + static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2371 + struct btrfs_root *extent_root, 2372 + struct map_lookup **map_ret, 2373 + u64 *num_bytes, u64 *stripe_size, 2374 + u64 start, u64 type) 2375 + { 2376 + struct btrfs_fs_info *info = extent_root->fs_info; 2377 + struct btrfs_device *device = NULL; 2378 + struct btrfs_fs_devices *fs_devices = info->fs_devices; 2379 + struct list_head *cur; 2380 + struct map_lookup *map; 2381 + struct extent_map_tree *em_tree; 2382 + struct extent_map *em; 2383 + struct btrfs_device_info *devices_info; 2384 + struct list_head private_devs; 2385 + u64 calc_size = 1024 * 1024 * 1024; 2386 + u64 min_free; 2387 + u64 avail; 2388 + u64 dev_offset; 2389 + int num_stripes; 2390 + int min_stripes; 2391 + int sub_stripes; 2392 + int min_devices; /* the min number of devices we need */ 2393 + int i; 2394 + int ret; 2395 + int index; 2396 + 2397 + if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2398 + (type & BTRFS_BLOCK_GROUP_DUP)) { 2399 + WARN_ON(1); 2400 + type &= ~BTRFS_BLOCK_GROUP_DUP; 2401 + } 2402 + if (list_empty(&fs_devices->alloc_list)) 2403 + return -ENOSPC; 2404 + 2405 + ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, 2406 + &min_stripes, &sub_stripes); 2407 + if (ret) 2408 + return ret; 2409 + 2410 + devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, 2411 + GFP_NOFS); 2412 + if (!devices_info) 2413 + return -ENOMEM; 2414 + 2415 + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2416 + if (!map) { 2417 + ret = -ENOMEM; 2418 + goto error; 2419 + } 2420 + map->num_stripes = num_stripes; 2421 2422 cur = fs_devices->alloc_list.next; 2423 index = 0; 2424 + i = 0; 2425 2426 + calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, 2427 + num_stripes, 0); 2428 + 2429 + if (type & BTRFS_BLOCK_GROUP_DUP) { 2430 min_free = calc_size * 2; 2431 + min_devices = 1; 2432 + } else { 2433 min_free = calc_size; 2434 + min_devices = min_stripes; 2435 + } 2436 2437 INIT_LIST_HEAD(&private_devs); 2438 while (index < num_stripes) { ··· 2287 cur = cur->next; 2288 2289 if (device->in_fs_metadata && avail >= min_free) { 2290 + ret = find_free_dev_extent(trans, device, min_free, 2291 + &devices_info[i].dev_offset, 2292 + &devices_info[i].max_avail); 2293 if (ret == 0) { 2294 list_move_tail(&device->dev_alloc_list, 2295 &private_devs); 2296 map->stripes[index].dev = device; 2297 + map->stripes[index].physical = 2298 + devices_info[i].dev_offset; 2299 index++; 2300 if (type & BTRFS_BLOCK_GROUP_DUP) { 2301 map->stripes[index].dev = device; 2302 map->stripes[index].physical = 2303 + devices_info[i].dev_offset + 2304 + calc_size; 2305 index++; 2306 } 2307 + } else if (ret != -ENOSPC) 2308 + goto error; 2309 + 2310 + devices_info[i].dev = device; 2311 + i++; 2312 + } else if (device->in_fs_metadata && 2313 + avail >= BTRFS_STRIPE_LEN) { 2314 + devices_info[i].dev = device; 2315 + devices_info[i].max_avail = avail; 2316 + i++; 2317 + } 2318 + 2319 if (cur == &fs_devices->alloc_list) 2320 break; 2321 } 2322 + 2323 list_splice(&private_devs, &fs_devices->alloc_list); 2324 if (index < num_stripes) { 2325 if (index >= min_stripes) { ··· 2316 num_stripes /= sub_stripes; 2317 num_stripes *= sub_stripes; 2318 } 2319 + 2320 + map = __shrink_map_lookup_stripes(map, num_stripes); 2321 + } else if (i >= min_devices) { 2322 + ret = __btrfs_alloc_tiny_space(trans, fs_devices, 2323 + devices_info, i, type, 2324 + &map, min_stripes, 2325 + &calc_size); 2326 + if (ret) 2327 + goto error; 2328 + } else { 2329 + ret = -ENOSPC; 2330 + goto error; 2331 } 2332 } 2333 map->sector_size = extent_root->sectorsize; 2334 + map->stripe_len = BTRFS_STRIPE_LEN; 2335 + map->io_align = BTRFS_STRIPE_LEN; 2336 + map->io_width = BTRFS_STRIPE_LEN; 2337 map->type = type; 2338 map->sub_stripes = sub_stripes; 2339 2340 *map_ret = map; 2341 *stripe_size = calc_size; 2342 *num_bytes = chunk_bytes_by_type(type, calc_size, 2343 + map->num_stripes, sub_stripes); 2344 2345 em = alloc_extent_map(GFP_NOFS); 2346 if (!em) { 2347 + ret = -ENOMEM; 2348 + goto error; 2349 } 2350 em->bdev = (struct block_device *)map; 2351 em->start = start; ··· 2376 index++; 2377 } 2378 2379 + kfree(devices_info); 2380 return 0; 2381 + 2382 + error: 2383 + kfree(map); 2384 + kfree(devices_info); 2385 + return ret; 2386 } 2387 2388 static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,

+27

fs/btrfs/volumes.h

··· 20 #define __BTRFS_VOLUMES_ 21 22 #include <linux/bio.h> 23 #include "async-thread.h" 24 25 struct buffer_head; 26 struct btrfs_pending_bios { ··· 138 int num_stripes; 139 struct btrfs_bio_stripe stripes[]; 140 }; 141 142 #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ 143 (sizeof(struct btrfs_bio_stripe) * (n)))

··· 20 #define __BTRFS_VOLUMES_ 21 22 #include <linux/bio.h> 23 + #include <linux/sort.h> 24 #include "async-thread.h" 25 + 26 + #define BTRFS_STRIPE_LEN (64 * 1024) 27 28 struct buffer_head; 29 struct btrfs_pending_bios { ··· 135 int num_stripes; 136 struct btrfs_bio_stripe stripes[]; 137 }; 138 + 139 + struct btrfs_device_info { 140 + struct btrfs_device *dev; 141 + u64 dev_offset; 142 + u64 max_avail; 143 + }; 144 + 145 + /* Used to sort the devices by max_avail(descending sort) */ 146 + int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 147 + 148 + /* 149 + * sort the devices by max_avail, in which max free extent size of each device 150 + * is stored.(Descending Sort) 151 + */ 152 + static inline void btrfs_descending_sort_devices( 153 + struct btrfs_device_info *devices, 154 + size_t nr_devices) 155 + { 156 + sort(devices, nr_devices, sizeof(struct btrfs_device_info), 157 + btrfs_cmp_device_free_bytes, NULL); 158 + } 159 + 160 + int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 161 + u64 end, u64 *length); 162 163 #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ 164 (sizeof(struct btrfs_bio_stripe) * (n)))

+18

fs/btrfs/xattr.c

··· 316 int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, 317 size_t size, int flags) 318 { 319 /* 320 * If this is a request for a synthetic attribute in the system.* 321 * namespace use the generic infrastructure to resolve a handler ··· 345 346 int btrfs_removexattr(struct dentry *dentry, const char *name) 347 { 348 /* 349 * If this is a request for a synthetic attribute in the system.* 350 * namespace use the generic infrastructure to resolve a handler

··· 316 int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, 317 size_t size, int flags) 318 { 319 + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 320 + 321 + /* 322 + * The permission on security.* and system.* is not checked 323 + * in permission(). 324 + */ 325 + if (btrfs_root_readonly(root)) 326 + return -EROFS; 327 + 328 /* 329 * If this is a request for a synthetic attribute in the system.* 330 * namespace use the generic infrastructure to resolve a handler ··· 336 337 int btrfs_removexattr(struct dentry *dentry, const char *name) 338 { 339 + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 340 + 341 + /* 342 + * The permission on security.* and system.* is not checked 343 + * in permission(). 344 + */ 345 + if (btrfs_root_readonly(root)) 346 + return -EROFS; 347 + 348 /* 349 * If this is a request for a synthetic attribute in the system.* 350 * namespace use the generic infrastructure to resolve a handler

+71 -300

fs/btrfs/zlib.c

··· 32 #include <linux/bio.h> 33 #include "compression.h" 34 35 - /* Plan: call deflate() with avail_in == *sourcelen, 36 - avail_out = *dstlen - 12 and flush == Z_FINISH. 37 - If it doesn't manage to finish, call it again with 38 - avail_in == 0 and avail_out set to the remaining 12 39 - bytes for it to clean up. 40 - Q: Is 12 bytes sufficient? 41 - */ 42 - #define STREAM_END_SPACE 12 43 - 44 struct workspace { 45 z_stream inf_strm; 46 z_stream def_strm; ··· 39 struct list_head list; 40 }; 41 42 - static LIST_HEAD(idle_workspace); 43 - static DEFINE_SPINLOCK(workspace_lock); 44 - static unsigned long num_workspace; 45 - static atomic_t alloc_workspace = ATOMIC_INIT(0); 46 - static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); 47 - 48 - /* 49 - * this finds an available zlib workspace or allocates a new one 50 - * NULL or an ERR_PTR is returned if things go bad. 51 - */ 52 - static struct workspace *find_zlib_workspace(void) 53 { 54 - struct workspace *workspace; 55 - int ret; 56 - int cpus = num_online_cpus(); 57 58 - again: 59 - spin_lock(&workspace_lock); 60 - if (!list_empty(&idle_workspace)) { 61 - workspace = list_entry(idle_workspace.next, struct workspace, 62 - list); 63 - list_del(&workspace->list); 64 - num_workspace--; 65 - spin_unlock(&workspace_lock); 66 - return workspace; 67 - 68 - } 69 - spin_unlock(&workspace_lock); 70 - if (atomic_read(&alloc_workspace) > cpus) { 71 - DEFINE_WAIT(wait); 72 - prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 73 - if (atomic_read(&alloc_workspace) > cpus) 74 - schedule(); 75 - finish_wait(&workspace_wait, &wait); 76 - goto again; 77 - } 78 - atomic_inc(&alloc_workspace); 79 - workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 80 - if (!workspace) { 81 - ret = -ENOMEM; 82 - goto fail; 83 - } 84 - 85 - workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 86 - if (!workspace->def_strm.workspace) { 87 - ret = -ENOMEM; 88 - goto fail; 89 - } 90 - workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 91 - if (!workspace->inf_strm.workspace) { 92 - ret = -ENOMEM; 93 - goto fail_inflate; 94 - } 95 - workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 96 - if (!workspace->buf) { 97 - ret = -ENOMEM; 98 - goto fail_kmalloc; 99 - } 100 - return workspace; 101 - 102 - fail_kmalloc: 103 - vfree(workspace->inf_strm.workspace); 104 - fail_inflate: 105 - vfree(workspace->def_strm.workspace); 106 - fail: 107 - kfree(workspace); 108 - atomic_dec(&alloc_workspace); 109 - wake_up(&workspace_wait); 110 - return ERR_PTR(ret); 111 - } 112 - 113 - /* 114 - * put a workspace struct back on the list or free it if we have enough 115 - * idle ones sitting around 116 - */ 117 - static int free_workspace(struct workspace *workspace) 118 - { 119 - spin_lock(&workspace_lock); 120 - if (num_workspace < num_online_cpus()) { 121 - list_add_tail(&workspace->list, &idle_workspace); 122 - num_workspace++; 123 - spin_unlock(&workspace_lock); 124 - if (waitqueue_active(&workspace_wait)) 125 - wake_up(&workspace_wait); 126 - return 0; 127 - } 128 - spin_unlock(&workspace_lock); 129 vfree(workspace->def_strm.workspace); 130 vfree(workspace->inf_strm.workspace); 131 kfree(workspace->buf); 132 kfree(workspace); 133 - 134 - atomic_dec(&alloc_workspace); 135 - if (waitqueue_active(&workspace_wait)) 136 - wake_up(&workspace_wait); 137 - return 0; 138 } 139 140 - /* 141 - * cleanup function for module exit 142 - */ 143 - static void free_workspaces(void) 144 { 145 struct workspace *workspace; 146 - while (!list_empty(&idle_workspace)) { 147 - workspace = list_entry(idle_workspace.next, struct workspace, 148 - list); 149 - list_del(&workspace->list); 150 - vfree(workspace->def_strm.workspace); 151 - vfree(workspace->inf_strm.workspace); 152 - kfree(workspace->buf); 153 - kfree(workspace); 154 - atomic_dec(&alloc_workspace); 155 - } 156 } 157 158 - /* 159 - * given an address space and start/len, compress the bytes. 160 - * 161 - * pages are allocated to hold the compressed result and stored 162 - * in 'pages' 163 - * 164 - * out_pages is used to return the number of pages allocated. There 165 - * may be pages allocated even if we return an error 166 - * 167 - * total_in is used to return the number of bytes actually read. It 168 - * may be smaller then len if we had to exit early because we 169 - * ran out of room in the pages array or because we cross the 170 - * max_out threshold. 171 - * 172 - * total_out is used to return the total number of compressed bytes 173 - * 174 - * max_out tells us the max number of bytes that we're allowed to 175 - * stuff into pages 176 - */ 177 - int btrfs_zlib_compress_pages(struct address_space *mapping, 178 - u64 start, unsigned long len, 179 - struct page **pages, 180 - unsigned long nr_dest_pages, 181 - unsigned long *out_pages, 182 - unsigned long *total_in, 183 - unsigned long *total_out, 184 - unsigned long max_out) 185 { 186 int ret; 187 - struct workspace *workspace; 188 char *data_in; 189 char *cpage_out; 190 int nr_pages = 0; ··· 94 *out_pages = 0; 95 *total_out = 0; 96 *total_in = 0; 97 - 98 - workspace = find_zlib_workspace(); 99 - if (IS_ERR(workspace)) 100 - return -1; 101 102 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 103 printk(KERN_WARNING "deflateInit failed\n"); ··· 108 data_in = kmap(in_page); 109 110 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 111 cpage_out = kmap(out_page); 112 pages[0] = out_page; 113 nr_pages = 1; ··· 150 goto out; 151 } 152 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 153 cpage_out = kmap(out_page); 154 pages[nr_pages] = out_page; 155 nr_pages++; ··· 208 kunmap(in_page); 209 page_cache_release(in_page); 210 } 211 - free_workspace(workspace); 212 return ret; 213 } 214 215 - /* 216 - * pages_in is an array of pages with compressed data. 217 - * 218 - * disk_start is the starting logical offset of this array in the file 219 - * 220 - * bvec is a bio_vec of pages from the file that we want to decompress into 221 - * 222 - * vcnt is the count of pages in the biovec 223 - * 224 - * srclen is the number of bytes in pages_in 225 - * 226 - * The basic idea is that we have a bio that was created by readpages. 227 - * The pages in the bio are for the uncompressed data, and they may not 228 - * be contiguous. They all correspond to the range of bytes covered by 229 - * the compressed extent. 230 - */ 231 - int btrfs_zlib_decompress_biovec(struct page **pages_in, 232 - u64 disk_start, 233 - struct bio_vec *bvec, 234 - int vcnt, 235 - size_t srclen) 236 { 237 - int ret = 0; 238 int wbits = MAX_WBITS; 239 - struct workspace *workspace; 240 char *data_in; 241 size_t total_out = 0; 242 - unsigned long page_bytes_left; 243 unsigned long page_in_index = 0; 244 unsigned long page_out_index = 0; 245 - struct page *page_out; 246 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 247 PAGE_CACHE_SIZE; 248 unsigned long buf_start; 249 - unsigned long buf_offset; 250 - unsigned long bytes; 251 - unsigned long working_bytes; 252 unsigned long pg_offset; 253 - unsigned long start_byte; 254 - unsigned long current_buf_start; 255 - char *kaddr; 256 - 257 - workspace = find_zlib_workspace(); 258 - if (IS_ERR(workspace)) 259 - return -ENOMEM; 260 261 data_in = kmap(pages_in[page_in_index]); 262 workspace->inf_strm.next_in = data_in; ··· 237 workspace->inf_strm.total_out = 0; 238 workspace->inf_strm.next_out = workspace->buf; 239 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 240 - page_out = bvec[page_out_index].bv_page; 241 - page_bytes_left = PAGE_CACHE_SIZE; 242 pg_offset = 0; 243 244 /* If it's deflate, and it's got no preset dictionary, then ··· 252 253 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 254 printk(KERN_WARNING "inflateInit failed\n"); 255 - ret = -1; 256 - goto out; 257 } 258 while (workspace->inf_strm.total_in < srclen) { 259 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 260 if (ret != Z_OK && ret != Z_STREAM_END) 261 break; 262 - /* 263 - * buf start is the byte offset we're of the start of 264 - * our workspace buffer 265 - */ 266 - buf_start = total_out; 267 268 - /* total_out is the last byte of the workspace buffer */ 269 total_out = workspace->inf_strm.total_out; 270 271 - working_bytes = total_out - buf_start; 272 - 273 - /* 274 - * start byte is the first byte of the page we're currently 275 - * copying into relative to the start of the compressed data. 276 - */ 277 - start_byte = page_offset(page_out) - disk_start; 278 - 279 - if (working_bytes == 0) { 280 - /* we didn't make progress in this inflate 281 - * call, we're done 282 - */ 283 - if (ret != Z_STREAM_END) 284 - ret = -1; 285 break; 286 } 287 288 - /* we haven't yet hit data corresponding to this page */ 289 - if (total_out <= start_byte) 290 - goto next; 291 - 292 - /* 293 - * the start of the data we care about is offset into 294 - * the middle of our working buffer 295 - */ 296 - if (total_out > start_byte && buf_start < start_byte) { 297 - buf_offset = start_byte - buf_start; 298 - working_bytes -= buf_offset; 299 - } else { 300 - buf_offset = 0; 301 - } 302 - current_buf_start = buf_start; 303 - 304 - /* copy bytes from the working buffer into the pages */ 305 - while (working_bytes > 0) { 306 - bytes = min(PAGE_CACHE_SIZE - pg_offset, 307 - PAGE_CACHE_SIZE - buf_offset); 308 - bytes = min(bytes, working_bytes); 309 - kaddr = kmap_atomic(page_out, KM_USER0); 310 - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, 311 - bytes); 312 - kunmap_atomic(kaddr, KM_USER0); 313 - flush_dcache_page(page_out); 314 - 315 - pg_offset += bytes; 316 - page_bytes_left -= bytes; 317 - buf_offset += bytes; 318 - working_bytes -= bytes; 319 - current_buf_start += bytes; 320 - 321 - /* check if we need to pick another page */ 322 - if (page_bytes_left == 0) { 323 - page_out_index++; 324 - if (page_out_index >= vcnt) { 325 - ret = 0; 326 - goto done; 327 - } 328 - 329 - page_out = bvec[page_out_index].bv_page; 330 - pg_offset = 0; 331 - page_bytes_left = PAGE_CACHE_SIZE; 332 - start_byte = page_offset(page_out) - disk_start; 333 - 334 - /* 335 - * make sure our new page is covered by this 336 - * working buffer 337 - */ 338 - if (total_out <= start_byte) 339 - goto next; 340 - 341 - /* the next page in the biovec might not 342 - * be adjacent to the last page, but it 343 - * might still be found inside this working 344 - * buffer. bump our offset pointer 345 - */ 346 - if (total_out > start_byte && 347 - current_buf_start < start_byte) { 348 - buf_offset = start_byte - buf_start; 349 - working_bytes = total_out - start_byte; 350 - current_buf_start = buf_start + 351 - buf_offset; 352 - } 353 - } 354 - } 355 - next: 356 workspace->inf_strm.next_out = workspace->buf; 357 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 358 ··· 301 zlib_inflateEnd(&workspace->inf_strm); 302 if (data_in) 303 kunmap(pages_in[page_in_index]); 304 - out: 305 - free_workspace(workspace); 306 return ret; 307 } 308 309 - /* 310 - * a less complex decompression routine. Our compressed data fits in a 311 - * single page, and we want to read a single page out of it. 312 - * start_byte tells us the offset into the compressed data we're interested in 313 - */ 314 - int btrfs_zlib_decompress(unsigned char *data_in, 315 - struct page *dest_page, 316 - unsigned long start_byte, 317 - size_t srclen, size_t destlen) 318 { 319 int ret = 0; 320 int wbits = MAX_WBITS; 321 - struct workspace *workspace; 322 unsigned long bytes_left = destlen; 323 unsigned long total_out = 0; 324 char *kaddr; 325 - 326 - if (destlen > PAGE_CACHE_SIZE) 327 - return -ENOMEM; 328 - 329 - workspace = find_zlib_workspace(); 330 - if (IS_ERR(workspace)) 331 - return -ENOMEM; 332 333 workspace->inf_strm.next_in = data_in; 334 workspace->inf_strm.avail_in = srclen; ··· 336 337 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 338 printk(KERN_WARNING "inflateInit failed\n"); 339 - ret = -1; 340 - goto out; 341 } 342 343 while (bytes_left > 0) { ··· 386 ret = 0; 387 388 zlib_inflateEnd(&workspace->inf_strm); 389 - out: 390 - free_workspace(workspace); 391 return ret; 392 } 393 394 - void btrfs_zlib_exit(void) 395 - { 396 - free_workspaces(); 397 - }

··· 32 #include <linux/bio.h> 33 #include "compression.h" 34 35 struct workspace { 36 z_stream inf_strm; 37 z_stream def_strm; ··· 48 struct list_head list; 49 }; 50 51 + static void zlib_free_workspace(struct list_head *ws) 52 { 53 + struct workspace *workspace = list_entry(ws, struct workspace, list); 54 55 vfree(workspace->def_strm.workspace); 56 vfree(workspace->inf_strm.workspace); 57 kfree(workspace->buf); 58 kfree(workspace); 59 } 60 61 + static struct list_head *zlib_alloc_workspace(void) 62 { 63 struct workspace *workspace; 64 + 65 + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 66 + if (!workspace) 67 + return ERR_PTR(-ENOMEM); 68 + 69 + workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 70 + workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 71 + workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 72 + if (!workspace->def_strm.workspace || 73 + !workspace->inf_strm.workspace || !workspace->buf) 74 + goto fail; 75 + 76 + INIT_LIST_HEAD(&workspace->list); 77 + 78 + return &workspace->list; 79 + fail: 80 + zlib_free_workspace(&workspace->list); 81 + return ERR_PTR(-ENOMEM); 82 } 83 84 + static int zlib_compress_pages(struct list_head *ws, 85 + struct address_space *mapping, 86 + u64 start, unsigned long len, 87 + struct page **pages, 88 + unsigned long nr_dest_pages, 89 + unsigned long *out_pages, 90 + unsigned long *total_in, 91 + unsigned long *total_out, 92 + unsigned long max_out) 93 { 94 + struct workspace *workspace = list_entry(ws, struct workspace, list); 95 int ret; 96 char *data_in; 97 char *cpage_out; 98 int nr_pages = 0; ··· 204 *out_pages = 0; 205 *total_out = 0; 206 *total_in = 0; 207 208 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 209 printk(KERN_WARNING "deflateInit failed\n"); ··· 222 data_in = kmap(in_page); 223 224 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 225 + if (out_page == NULL) { 226 + ret = -1; 227 + goto out; 228 + } 229 cpage_out = kmap(out_page); 230 pages[0] = out_page; 231 nr_pages = 1; ··· 260 goto out; 261 } 262 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 263 + if (out_page == NULL) { 264 + ret = -1; 265 + goto out; 266 + } 267 cpage_out = kmap(out_page); 268 pages[nr_pages] = out_page; 269 nr_pages++; ··· 314 kunmap(in_page); 315 page_cache_release(in_page); 316 } 317 return ret; 318 } 319 320 + static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, 321 + u64 disk_start, 322 + struct bio_vec *bvec, 323 + int vcnt, 324 + size_t srclen) 325 { 326 + struct workspace *workspace = list_entry(ws, struct workspace, list); 327 + int ret = 0, ret2; 328 int wbits = MAX_WBITS; 329 char *data_in; 330 size_t total_out = 0; 331 unsigned long page_in_index = 0; 332 unsigned long page_out_index = 0; 333 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 334 PAGE_CACHE_SIZE; 335 unsigned long buf_start; 336 unsigned long pg_offset; 337 338 data_in = kmap(pages_in[page_in_index]); 339 workspace->inf_strm.next_in = data_in; ··· 372 workspace->inf_strm.total_out = 0; 373 workspace->inf_strm.next_out = workspace->buf; 374 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 375 pg_offset = 0; 376 377 /* If it's deflate, and it's got no preset dictionary, then ··· 389 390 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 391 printk(KERN_WARNING "inflateInit failed\n"); 392 + return -1; 393 } 394 while (workspace->inf_strm.total_in < srclen) { 395 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 396 if (ret != Z_OK && ret != Z_STREAM_END) 397 break; 398 399 + buf_start = total_out; 400 total_out = workspace->inf_strm.total_out; 401 402 + /* we didn't make progress in this inflate call, we're done */ 403 + if (buf_start == total_out) 404 break; 405 + 406 + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, 407 + total_out, disk_start, 408 + bvec, vcnt, 409 + &page_out_index, &pg_offset); 410 + if (ret2 == 0) { 411 + ret = 0; 412 + goto done; 413 } 414 415 workspace->inf_strm.next_out = workspace->buf; 416 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 417 ··· 516 zlib_inflateEnd(&workspace->inf_strm); 517 if (data_in) 518 kunmap(pages_in[page_in_index]); 519 return ret; 520 } 521 522 + static int zlib_decompress(struct list_head *ws, unsigned char *data_in, 523 + struct page *dest_page, 524 + unsigned long start_byte, 525 + size_t srclen, size_t destlen) 526 { 527 + struct workspace *workspace = list_entry(ws, struct workspace, list); 528 int ret = 0; 529 int wbits = MAX_WBITS; 530 unsigned long bytes_left = destlen; 531 unsigned long total_out = 0; 532 char *kaddr; 533 534 workspace->inf_strm.next_in = data_in; 535 workspace->inf_strm.avail_in = srclen; ··· 565 566 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 567 printk(KERN_WARNING "inflateInit failed\n"); 568 + return -1; 569 } 570 571 while (bytes_left > 0) { ··· 616 ret = 0; 617 618 zlib_inflateEnd(&workspace->inf_strm); 619 return ret; 620 } 621 622 + struct btrfs_compress_op btrfs_zlib_compress = { 623 + .alloc_workspace = zlib_alloc_workspace, 624 + .free_workspace = zlib_free_workspace, 625 + .compress_pages = zlib_compress_pages, 626 + .decompress_biovec = zlib_decompress_biovec, 627 + .decompress = zlib_decompress, 628 + };