commit 925d169f5b86fe57e2f5264ea574cce9a89b719d · tjh.dev/kernel

-2

fs/btrfs/compression.c

··· 163 */ 164 static void end_compressed_bio_read(struct bio *bio, int err) 165 { 166 - struct extent_io_tree *tree; 167 struct compressed_bio *cb = bio->bi_private; 168 struct inode *inode; 169 struct page *page; ··· 186 /* ok, we're the last bio for this extent, lets start 187 * the decompression. 188 */ 189 - tree = &BTRFS_I(inode)->io_tree; 190 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 191 cb->start, 192 cb->orig_bio->bi_io_vec,

··· 163 */ 164 static void end_compressed_bio_read(struct bio *bio, int err) 165 { 166 struct compressed_bio *cb = bio->bi_private; 167 struct inode *inode; 168 struct page *page; ··· 187 /* ok, we're the last bio for this extent, lets start 188 * the decompression. 189 */ 190 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 191 cb->start, 192 cb->orig_bio->bi_io_vec,

+30 -27

fs/btrfs/ctree.c

··· 200 struct extent_buffer **cow_ret, u64 new_root_objectid) 201 { 202 struct extent_buffer *cow; 203 - u32 nritems; 204 int ret = 0; 205 int level; 206 struct btrfs_disk_key disk_key; ··· 209 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 210 211 level = btrfs_header_level(buf); 212 - nritems = btrfs_header_nritems(buf); 213 if (level == 0) 214 btrfs_item_key(buf, &disk_key, 0); 215 else ··· 1006 int wret; 1007 int pslot; 1008 int orig_slot = path->slots[level]; 1009 - int err_on_enospc = 0; 1010 u64 orig_ptr; 1011 1012 if (level == 0) ··· 1068 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1069 return 0; 1070 1071 - if (btrfs_header_nritems(mid) < 2) 1072 - err_on_enospc = 1; 1073 1074 left = read_node_slot(root, parent, pslot - 1); 1075 if (left) { ··· 1099 wret = push_node_left(trans, root, left, mid, 1); 1100 if (wret < 0) 1101 ret = wret; 1102 - if (btrfs_header_nritems(mid) < 2) 1103 - err_on_enospc = 1; 1104 } 1105 1106 /* ··· 1219 int wret; 1220 int pslot; 1221 int orig_slot = path->slots[level]; 1222 - u64 orig_ptr; 1223 1224 if (level == 0) 1225 return 1; 1226 1227 mid = path->nodes[level]; 1228 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1229 - orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1230 1231 if (level < BTRFS_MAX_LEVEL - 1) 1232 parent = path->nodes[level + 1]; ··· 1570 blocksize = btrfs_level_size(root, level - 1); 1571 1572 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1573 - if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1574 - /* 1575 - * we found an up to date block without sleeping, return 1576 - * right away 1577 - */ 1578 - *eb_ret = tmp; 1579 - return 0; 1580 } 1581 1582 /* ··· 1609 btrfs_unlock_up_safe(p, level + 1); 1610 btrfs_set_path_blocking(p); 1611 1612 - if (tmp) 1613 - free_extent_buffer(tmp); 1614 if (p->reada) 1615 reada_for_search(root, p, level, slot, key->objectid); 1616 ··· 2560 { 2561 struct btrfs_disk_key disk_key; 2562 struct extent_buffer *right = path->nodes[0]; 2563 - int slot; 2564 int i; 2565 int push_space = 0; 2566 int push_items = 0; ··· 2570 int wret; 2571 u32 this_item_size; 2572 u32 old_left_item_size; 2573 - 2574 - slot = path->slots[1]; 2575 2576 if (empty) 2577 nr = min(right_nritems, max_slot); ··· 3339 { 3340 int ret = 0; 3341 int slot; 3342 - int slot_orig; 3343 struct extent_buffer *leaf; 3344 struct btrfs_item *item; 3345 u32 nritems; ··· 3348 unsigned int size_diff; 3349 int i; 3350 3351 - slot_orig = path->slots[0]; 3352 leaf = path->nodes[0]; 3353 slot = path->slots[0]; 3354 ··· 3452 { 3453 int ret = 0; 3454 int slot; 3455 - int slot_orig; 3456 struct extent_buffer *leaf; 3457 struct btrfs_item *item; 3458 u32 nritems; ··· 3460 unsigned int old_size; 3461 int i; 3462 3463 - slot_orig = path->slots[0]; 3464 leaf = path->nodes[0]; 3465 3466 nritems = btrfs_header_nritems(leaf); ··· 3792 struct btrfs_key *cpu_key, u32 *data_size, 3793 int nr) 3794 { 3795 - struct extent_buffer *leaf; 3796 int ret = 0; 3797 int slot; 3798 int i; ··· 3808 if (ret < 0) 3809 goto out; 3810 3811 - leaf = path->nodes[0]; 3812 slot = path->slots[0]; 3813 BUG_ON(slot < 0); 3814

··· 200 struct extent_buffer **cow_ret, u64 new_root_objectid) 201 { 202 struct extent_buffer *cow; 203 int ret = 0; 204 int level; 205 struct btrfs_disk_key disk_key; ··· 210 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 211 212 level = btrfs_header_level(buf); 213 if (level == 0) 214 btrfs_item_key(buf, &disk_key, 0); 215 else ··· 1008 int wret; 1009 int pslot; 1010 int orig_slot = path->slots[level]; 1011 u64 orig_ptr; 1012 1013 if (level == 0) ··· 1071 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1072 return 0; 1073 1074 + btrfs_header_nritems(mid); 1075 1076 left = read_node_slot(root, parent, pslot - 1); 1077 if (left) { ··· 1103 wret = push_node_left(trans, root, left, mid, 1); 1104 if (wret < 0) 1105 ret = wret; 1106 + btrfs_header_nritems(mid); 1107 } 1108 1109 /* ··· 1224 int wret; 1225 int pslot; 1226 int orig_slot = path->slots[level]; 1227 1228 if (level == 0) 1229 return 1; 1230 1231 mid = path->nodes[level]; 1232 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1233 1234 if (level < BTRFS_MAX_LEVEL - 1) 1235 parent = path->nodes[level + 1]; ··· 1577 blocksize = btrfs_level_size(root, level - 1); 1578 1579 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1580 + if (tmp) { 1581 + if (btrfs_buffer_uptodate(tmp, 0)) { 1582 + if (btrfs_buffer_uptodate(tmp, gen)) { 1583 + /* 1584 + * we found an up to date block without 1585 + * sleeping, return 1586 + * right away 1587 + */ 1588 + *eb_ret = tmp; 1589 + return 0; 1590 + } 1591 + /* the pages were up to date, but we failed 1592 + * the generation number check. Do a full 1593 + * read for the generation number that is correct. 1594 + * We must do this without dropping locks so 1595 + * we can trust our generation number 1596 + */ 1597 + free_extent_buffer(tmp); 1598 + tmp = read_tree_block(root, blocknr, blocksize, gen); 1599 + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1600 + *eb_ret = tmp; 1601 + return 0; 1602 + } 1603 + free_extent_buffer(tmp); 1604 + btrfs_release_path(NULL, p); 1605 + return -EIO; 1606 + } 1607 } 1608 1609 /* ··· 1596 btrfs_unlock_up_safe(p, level + 1); 1597 btrfs_set_path_blocking(p); 1598 1599 + free_extent_buffer(tmp); 1600 if (p->reada) 1601 reada_for_search(root, p, level, slot, key->objectid); 1602 ··· 2548 { 2549 struct btrfs_disk_key disk_key; 2550 struct extent_buffer *right = path->nodes[0]; 2551 int i; 2552 int push_space = 0; 2553 int push_items = 0; ··· 2559 int wret; 2560 u32 this_item_size; 2561 u32 old_left_item_size; 2562 2563 if (empty) 2564 nr = min(right_nritems, max_slot); ··· 3330 { 3331 int ret = 0; 3332 int slot; 3333 struct extent_buffer *leaf; 3334 struct btrfs_item *item; 3335 u32 nritems; ··· 3340 unsigned int size_diff; 3341 int i; 3342 3343 leaf = path->nodes[0]; 3344 slot = path->slots[0]; 3345 ··· 3445 { 3446 int ret = 0; 3447 int slot; 3448 struct extent_buffer *leaf; 3449 struct btrfs_item *item; 3450 u32 nritems; ··· 3454 unsigned int old_size; 3455 int i; 3456 3457 leaf = path->nodes[0]; 3458 3459 nritems = btrfs_header_nritems(leaf); ··· 3787 struct btrfs_key *cpu_key, u32 *data_size, 3788 int nr) 3789 { 3790 int ret = 0; 3791 int slot; 3792 int i; ··· 3804 if (ret < 0) 3805 goto out; 3806 3807 slot = path->slots[0]; 3808 BUG_ON(slot < 0); 3809

+89 -11

fs/btrfs/ctree.h

··· 99 */ 100 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 101 102 /* dummy objectid represents multiple objectids */ 103 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL 104 ··· 268 /* additional stripes go here */ 269 } __attribute__ ((__packed__)); 270 271 static inline unsigned long btrfs_chunk_item_size(int num_stripes) 272 { 273 BUG_ON(num_stripes == 0); ··· 384 385 char label[BTRFS_LABEL_SIZE]; 386 387 /* future expansion */ 388 - __le64 reserved[32]; 389 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 390 } __attribute__ ((__packed__)); 391 ··· 396 * ones specified below then we will fail to mount 397 */ 398 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 399 - #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) 400 401 #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 402 #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 403 - #define BTRFS_FEATURE_INCOMPAT_SUPP \ 404 - (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 405 - BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) 406 407 /* 408 * A leaf is full of items. offset and size tell us where to find ··· 698 struct btrfs_space_info { 699 u64 flags; 700 701 - u64 total_bytes; /* total bytes in the space */ 702 u64 bytes_used; /* total bytes used, 703 this does't take mirrors into account */ 704 u64 bytes_pinned; /* total bytes pinned, will be freed when the ··· 711 u64 bytes_may_use; /* number of bytes that may be used for 712 delalloc/allocations */ 713 u64 disk_used; /* total bytes used on disk */ 714 715 int full; /* indicates that we cannot allocate any more 716 chunks for this space */ ··· 776 BTRFS_CACHE_FINISHED = 2, 777 }; 778 779 struct btrfs_caching_control { 780 struct list_head list; 781 struct mutex mutex; ··· 797 struct btrfs_key key; 798 struct btrfs_block_group_item item; 799 struct btrfs_fs_info *fs_info; 800 spinlock_t lock; 801 u64 pinned; 802 u64 reserved; ··· 808 int extents_thresh; 809 int free_extents; 810 int total_bitmaps; 811 - int ro; 812 - int dirty; 813 814 /* cache tracking stuff */ 815 int cached; ··· 901 struct btrfs_transaction *running_transaction; 902 wait_queue_head_t transaction_throttle; 903 wait_queue_head_t transaction_wait; 904 wait_queue_head_t async_submit_wait; 905 906 struct btrfs_super_block super_copy; ··· 988 struct btrfs_workers endio_meta_workers; 989 struct btrfs_workers endio_meta_write_workers; 990 struct btrfs_workers endio_write_workers; 991 struct btrfs_workers submit_workers; 992 /* 993 * fixup workers take dirty pages that didn't properly go through ··· 1232 #define BTRFS_MOUNT_NOSSD (1 << 9) 1233 #define BTRFS_MOUNT_DISCARD (1 << 10) 1234 #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1235 1236 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1237 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) ··· 1708 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1709 } 1710 1711 /* struct btrfs_disk_key */ 1712 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1713 objectid, 64); ··· 1940 incompat_flags, 64); 1941 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1942 csum_type, 16); 1943 1944 static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1945 { ··· 2054 return file->f_path.dentry; 2055 } 2056 2057 /* extent-tree.c */ 2058 void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2059 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ··· 2151 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2152 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2153 struct btrfs_root *root, 2154 - int num_items, int *retries); 2155 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2156 struct btrfs_root *root); 2157 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, ··· 2172 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 2173 struct btrfs_root *root, 2174 struct btrfs_block_rsv *block_rsv, 2175 - u64 num_bytes, int *retries); 2176 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 2177 struct btrfs_root *root, 2178 struct btrfs_block_rsv *block_rsv, ··· 2187 struct btrfs_block_group_cache *cache); 2188 int btrfs_set_block_group_rw(struct btrfs_root *root, 2189 struct btrfs_block_group_cache *cache); 2190 /* ctree.c */ 2191 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2192 int level, int *slot); ··· 2446 u32 min_type); 2447 2448 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2449 - int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); 2450 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2451 struct extent_state **cached_state); 2452 int btrfs_writepages(struct address_space *mapping, ··· 2500 int btrfs_prealloc_file_range(struct inode *inode, int mode, 2501 u64 start, u64 num_bytes, u64 min_size, 2502 loff_t actual_len, u64 *alloc_hint); 2503 extern const struct dentry_operations btrfs_dentry_operations; 2504 2505 /* ioctl.c */

··· 99 */ 100 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 101 102 + /* For storing free space cache */ 103 + #define BTRFS_FREE_SPACE_OBJECTID -11ULL 104 + 105 /* dummy objectid represents multiple objectids */ 106 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL 107 ··· 265 /* additional stripes go here */ 266 } __attribute__ ((__packed__)); 267 268 + #define BTRFS_FREE_SPACE_EXTENT 1 269 + #define BTRFS_FREE_SPACE_BITMAP 2 270 + 271 + struct btrfs_free_space_entry { 272 + __le64 offset; 273 + __le64 bytes; 274 + u8 type; 275 + } __attribute__ ((__packed__)); 276 + 277 + struct btrfs_free_space_header { 278 + struct btrfs_disk_key location; 279 + __le64 generation; 280 + __le64 num_entries; 281 + __le64 num_bitmaps; 282 + } __attribute__ ((__packed__)); 283 + 284 static inline unsigned long btrfs_chunk_item_size(int num_stripes) 285 { 286 BUG_ON(num_stripes == 0); ··· 365 366 char label[BTRFS_LABEL_SIZE]; 367 368 + __le64 cache_generation; 369 + 370 /* future expansion */ 371 + __le64 reserved[31]; 372 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 373 } __attribute__ ((__packed__)); 374 ··· 375 * ones specified below then we will fail to mount 376 */ 377 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 378 + #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 379 + #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 380 381 #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 382 #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 383 + #define BTRFS_FEATURE_INCOMPAT_SUPP \ 384 + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 385 + BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 386 + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 387 388 /* 389 * A leaf is full of items. offset and size tell us where to find ··· 675 struct btrfs_space_info { 676 u64 flags; 677 678 + u64 total_bytes; /* total bytes in the space, 679 + this doesn't take mirrors into account */ 680 u64 bytes_used; /* total bytes used, 681 this does't take mirrors into account */ 682 u64 bytes_pinned; /* total bytes pinned, will be freed when the ··· 687 u64 bytes_may_use; /* number of bytes that may be used for 688 delalloc/allocations */ 689 u64 disk_used; /* total bytes used on disk */ 690 + u64 disk_total; /* total bytes on disk, takes mirrors into 691 + account */ 692 693 int full; /* indicates that we cannot allocate any more 694 chunks for this space */ ··· 750 BTRFS_CACHE_FINISHED = 2, 751 }; 752 753 + enum btrfs_disk_cache_state { 754 + BTRFS_DC_WRITTEN = 0, 755 + BTRFS_DC_ERROR = 1, 756 + BTRFS_DC_CLEAR = 2, 757 + BTRFS_DC_SETUP = 3, 758 + BTRFS_DC_NEED_WRITE = 4, 759 + }; 760 + 761 struct btrfs_caching_control { 762 struct list_head list; 763 struct mutex mutex; ··· 763 struct btrfs_key key; 764 struct btrfs_block_group_item item; 765 struct btrfs_fs_info *fs_info; 766 + struct inode *inode; 767 spinlock_t lock; 768 u64 pinned; 769 u64 reserved; ··· 773 int extents_thresh; 774 int free_extents; 775 int total_bitmaps; 776 + int ro:1; 777 + int dirty:1; 778 + int iref:1; 779 + 780 + int disk_cache_state; 781 782 /* cache tracking stuff */ 783 int cached; ··· 863 struct btrfs_transaction *running_transaction; 864 wait_queue_head_t transaction_throttle; 865 wait_queue_head_t transaction_wait; 866 + wait_queue_head_t transaction_blocked_wait; 867 wait_queue_head_t async_submit_wait; 868 869 struct btrfs_super_block super_copy; ··· 949 struct btrfs_workers endio_meta_workers; 950 struct btrfs_workers endio_meta_write_workers; 951 struct btrfs_workers endio_write_workers; 952 + struct btrfs_workers endio_freespace_worker; 953 struct btrfs_workers submit_workers; 954 /* 955 * fixup workers take dirty pages that didn't properly go through ··· 1192 #define BTRFS_MOUNT_NOSSD (1 << 9) 1193 #define BTRFS_MOUNT_DISCARD (1 << 10) 1194 #define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1195 + #define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1196 + #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1197 + #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1198 1199 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1200 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) ··· 1665 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1666 } 1667 1668 + BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, 1669 + num_entries, 64); 1670 + BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, 1671 + num_bitmaps, 64); 1672 + BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, 1673 + generation, 64); 1674 + 1675 + static inline void btrfs_free_space_key(struct extent_buffer *eb, 1676 + struct btrfs_free_space_header *h, 1677 + struct btrfs_disk_key *key) 1678 + { 1679 + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); 1680 + } 1681 + 1682 + static inline void btrfs_set_free_space_key(struct extent_buffer *eb, 1683 + struct btrfs_free_space_header *h, 1684 + struct btrfs_disk_key *key) 1685 + { 1686 + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); 1687 + } 1688 + 1689 /* struct btrfs_disk_key */ 1690 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1691 objectid, 64); ··· 1876 incompat_flags, 64); 1877 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1878 csum_type, 16); 1879 + BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, 1880 + cache_generation, 64); 1881 1882 static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1883 { ··· 1988 return file->f_path.dentry; 1989 } 1990 1991 + static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) 1992 + { 1993 + return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && 1994 + (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); 1995 + } 1996 + 1997 /* extent-tree.c */ 1998 void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 1999 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ··· 2079 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2080 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2081 struct btrfs_root *root, 2082 + int num_items); 2083 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2084 struct btrfs_root *root); 2085 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, ··· 2100 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 2101 struct btrfs_root *root, 2102 struct btrfs_block_rsv *block_rsv, 2103 + u64 num_bytes); 2104 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 2105 struct btrfs_root *root, 2106 struct btrfs_block_rsv *block_rsv, ··· 2115 struct btrfs_block_group_cache *cache); 2116 int btrfs_set_block_group_rw(struct btrfs_root *root, 2117 struct btrfs_block_group_cache *cache); 2118 + void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2119 /* ctree.c */ 2120 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2121 int level, int *slot); ··· 2373 u32 min_type); 2374 2375 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2376 + int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, 2377 + int sync); 2378 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2379 struct extent_state **cached_state); 2380 int btrfs_writepages(struct address_space *mapping, ··· 2426 int btrfs_prealloc_file_range(struct inode *inode, int mode, 2427 u64 start, u64 num_bytes, u64 min_size, 2428 loff_t actual_len, u64 *alloc_hint); 2429 + int btrfs_prealloc_file_range_trans(struct inode *inode, 2430 + struct btrfs_trans_handle *trans, int mode, 2431 + u64 start, u64 num_bytes, u64 min_size, 2432 + loff_t actual_len, u64 *alloc_hint); 2433 extern const struct dentry_operations btrfs_dentry_operations; 2434 2435 /* ioctl.c */

+1 -1

fs/btrfs/dir-item.c

··· 427 ret = btrfs_truncate_item(trans, root, path, 428 item_len - sub_item_len, 1); 429 } 430 - return 0; 431 }

··· 427 ret = btrfs_truncate_item(trans, root, path, 428 item_len - sub_item_len, 1); 429 } 430 + return ret; 431 }

+19 -13

fs/btrfs/disk-io.c

··· 338 struct extent_io_tree *tree; 339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 340 u64 found_start; 341 - int found_level; 342 unsigned long len; 343 struct extent_buffer *eb; 344 int ret; ··· 368 WARN_ON(1); 369 goto err; 370 } 371 - found_level = btrfs_header_level(eb); 372 - 373 csum_tree_block(root, eb, 0); 374 err: 375 free_extent_buffer(eb); ··· 478 end_io_wq->work.flags = 0; 479 480 if (bio->bi_rw & REQ_WRITE) { 481 - if (end_io_wq->metadata) 482 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 483 &end_io_wq->work); 484 else 485 btrfs_queue_worker(&fs_info->endio_write_workers, ··· 497 } 498 } 499 500 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 501 int metadata) 502 { ··· 540 541 static void run_one_async_start(struct btrfs_work *work) 542 { 543 - struct btrfs_fs_info *fs_info; 544 struct async_submit_bio *async; 545 546 async = container_of(work, struct async_submit_bio, work); 547 - fs_info = BTRFS_I(async->inode)->root->fs_info; 548 async->submit_bio_start(async->inode, async->rw, async->bio, 549 async->mirror_num, async->bio_flags, 550 async->bio_offset); ··· 855 u32 blocksize, u64 parent_transid) 856 { 857 struct extent_buffer *buf = NULL; 858 - struct inode *btree_inode = root->fs_info->btree_inode; 859 - struct extent_io_tree *io_tree; 860 int ret; 861 - 862 - io_tree = &BTRFS_I(btree_inode)->io_tree; 863 864 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 865 if (!buf) ··· 1378 u64 start = 0; 1379 struct page *page; 1380 struct extent_io_tree *io_tree = NULL; 1381 - struct btrfs_fs_info *info = NULL; 1382 struct bio_vec *bvec; 1383 int i; 1384 int ret; ··· 1396 buf_len = page->private >> 2; 1397 start = page_offset(page) + bvec->bv_offset; 1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1399 - info = BTRFS_I(page->mapping->host)->root->fs_info; 1400 } 1401 /* are we fully contained in this bio? */ 1402 if (buf_len <= length) ··· 1679 1680 init_waitqueue_head(&fs_info->transaction_throttle); 1681 init_waitqueue_head(&fs_info->transaction_wait); 1682 init_waitqueue_head(&fs_info->async_submit_wait); 1683 1684 __setup_root(4096, 4096, 4096, 4096, tree_root, 1685 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1686 - 1687 1688 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1689 if (!bh) ··· 1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1775 fs_info->thread_pool_size, 1776 &fs_info->generic_worker); 1777 1778 /* 1779 * endios are largely parallel and should have a very ··· 1796 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1797 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1798 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1799 1800 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1801 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, ··· 1995 if (!(sb->s_flags & MS_RDONLY)) { 1996 down_read(&fs_info->cleanup_work_sem); 1997 btrfs_orphan_cleanup(fs_info->fs_root); 1998 up_read(&fs_info->cleanup_work_sem); 1999 } 2000 ··· 2038 btrfs_stop_workers(&fs_info->endio_meta_workers); 2039 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2040 btrfs_stop_workers(&fs_info->endio_write_workers); 2041 btrfs_stop_workers(&fs_info->submit_workers); 2042 fail_iput: 2043 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); ··· 2414 fs_info->closing = 1; 2415 smp_mb(); 2416 2417 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2418 ret = btrfs_commit_super(root); 2419 if (ret) ··· 2461 btrfs_stop_workers(&fs_info->endio_meta_workers); 2462 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2463 btrfs_stop_workers(&fs_info->endio_write_workers); 2464 btrfs_stop_workers(&fs_info->submit_workers); 2465 2466 btrfs_close_devices(fs_info->fs_devices);

··· 338 struct extent_io_tree *tree; 339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 340 u64 found_start; 341 unsigned long len; 342 struct extent_buffer *eb; 343 int ret; ··· 369 WARN_ON(1); 370 goto err; 371 } 372 csum_tree_block(root, eb, 0); 373 err: 374 free_extent_buffer(eb); ··· 481 end_io_wq->work.flags = 0; 482 483 if (bio->bi_rw & REQ_WRITE) { 484 + if (end_io_wq->metadata == 1) 485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 486 + &end_io_wq->work); 487 + else if (end_io_wq->metadata == 2) 488 + btrfs_queue_worker(&fs_info->endio_freespace_worker, 489 &end_io_wq->work); 490 else 491 btrfs_queue_worker(&fs_info->endio_write_workers, ··· 497 } 498 } 499 500 + /* 501 + * For the metadata arg you want 502 + * 503 + * 0 - if data 504 + * 1 - if normal metadta 505 + * 2 - if writing to the free space cache area 506 + */ 507 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 508 int metadata) 509 { ··· 533 534 static void run_one_async_start(struct btrfs_work *work) 535 { 536 struct async_submit_bio *async; 537 538 async = container_of(work, struct async_submit_bio, work); 539 async->submit_bio_start(async->inode, async->rw, async->bio, 540 async->mirror_num, async->bio_flags, 541 async->bio_offset); ··· 850 u32 blocksize, u64 parent_transid) 851 { 852 struct extent_buffer *buf = NULL; 853 int ret; 854 855 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 856 if (!buf) ··· 1377 u64 start = 0; 1378 struct page *page; 1379 struct extent_io_tree *io_tree = NULL; 1380 struct bio_vec *bvec; 1381 int i; 1382 int ret; ··· 1396 buf_len = page->private >> 2; 1397 start = page_offset(page) + bvec->bv_offset; 1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1399 } 1400 /* are we fully contained in this bio? */ 1401 if (buf_len <= length) ··· 1680 1681 init_waitqueue_head(&fs_info->transaction_throttle); 1682 init_waitqueue_head(&fs_info->transaction_wait); 1683 + init_waitqueue_head(&fs_info->transaction_blocked_wait); 1684 init_waitqueue_head(&fs_info->async_submit_wait); 1685 1686 __setup_root(4096, 4096, 4096, 4096, tree_root, 1687 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1688 1689 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1690 if (!bh) ··· 1775 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1776 fs_info->thread_pool_size, 1777 &fs_info->generic_worker); 1778 + btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1779 + 1, &fs_info->generic_worker); 1780 1781 /* 1782 * endios are largely parallel and should have a very ··· 1795 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1796 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1797 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1798 + btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1799 1800 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1801 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, ··· 1993 if (!(sb->s_flags & MS_RDONLY)) { 1994 down_read(&fs_info->cleanup_work_sem); 1995 btrfs_orphan_cleanup(fs_info->fs_root); 1996 + btrfs_orphan_cleanup(fs_info->tree_root); 1997 up_read(&fs_info->cleanup_work_sem); 1998 } 1999 ··· 2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2038 + btrfs_stop_workers(&fs_info->endio_freespace_worker); 2039 btrfs_stop_workers(&fs_info->submit_workers); 2040 fail_iput: 2041 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); ··· 2410 fs_info->closing = 1; 2411 smp_mb(); 2412 2413 + btrfs_put_block_group_cache(fs_info); 2414 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2415 ret = btrfs_commit_super(root); 2416 if (ret) ··· 2456 btrfs_stop_workers(&fs_info->endio_meta_workers); 2457 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2458 btrfs_stop_workers(&fs_info->endio_write_workers); 2459 + btrfs_stop_workers(&fs_info->endio_freespace_worker); 2460 btrfs_stop_workers(&fs_info->submit_workers); 2461 2462 btrfs_close_devices(fs_info->fs_devices);

+545 -159

fs/btrfs/extent-tree.c

··· 242 return NULL; 243 } 244 245 ctl = cache->caching_ctl; 246 atomic_inc(&ctl->count); 247 spin_unlock(&cache->lock); ··· 427 return 0; 428 } 429 430 - static int cache_block_group(struct btrfs_block_group_cache *cache) 431 { 432 struct btrfs_fs_info *fs_info = cache->fs_info; 433 struct btrfs_caching_control *caching_ctl; ··· 438 439 smp_mb(); 440 if (cache->cached != BTRFS_CACHE_NO) 441 return 0; 442 443 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); ··· 547 548 rcu_read_lock(); 549 list_for_each_entry_rcu(found, head, list) { 550 - if (found->flags == flags) { 551 rcu_read_unlock(); 552 return found; 553 } ··· 577 return num; 578 num *= factor; 579 do_div(num, 10); 580 return num; 581 } 582 ··· 2734 return cache; 2735 } 2736 2737 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2738 struct btrfs_root *root) 2739 { ··· 2849 if (!path) 2850 return -ENOMEM; 2851 2852 while (1) { 2853 if (last == 0) { 2854 err = btrfs_run_delayed_refs(trans, root, ··· 2877 2878 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2879 while (cache) { 2880 if (cache->dirty) 2881 break; 2882 cache = next_block_group(root, cache); ··· 2893 continue; 2894 } 2895 2896 cache->dirty = 0; 2897 last = cache->key.objectid + cache->key.offset; 2898 2899 err = write_one_cache_group(trans, root, path, cache); 2900 BUG_ON(err); 2901 btrfs_put_block_group(cache); 2902 } 2903 ··· 2984 if (found) { 2985 spin_lock(&found->lock); 2986 found->total_bytes += total_bytes; 2987 found->bytes_used += bytes_used; 2988 found->disk_used += bytes_used * factor; 2989 found->full = 0; ··· 3004 BTRFS_BLOCK_GROUP_SYSTEM | 3005 BTRFS_BLOCK_GROUP_METADATA); 3006 found->total_bytes = total_bytes; 3007 found->bytes_used = bytes_used; 3008 found->disk_used = bytes_used * factor; 3009 found->bytes_pinned = 0; ··· 3106 struct btrfs_space_info *data_sinfo; 3107 struct btrfs_root *root = BTRFS_I(inode)->root; 3108 u64 used; 3109 - int ret = 0, committed = 0; 3110 3111 /* make sure bytes are sectorsize aligned */ 3112 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3113 3114 data_sinfo = BTRFS_I(inode)->space_info; 3115 if (!data_sinfo) ··· 3134 * if we don't have enough free bytes in this space then we need 3135 * to alloc a new chunk. 3136 */ 3137 - if (!data_sinfo->full) { 3138 u64 alloc_target; 3139 3140 data_sinfo->force_alloc = 1; ··· 3226 rcu_read_unlock(); 3227 } 3228 3229 - static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3230 - u64 alloc_bytes) 3231 { 3232 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3233 3234 if (sinfo->bytes_used + sinfo->bytes_reserved + 3235 alloc_bytes + 256 * 1024 * 1024 < num_bytes) ··· 3238 3239 if (sinfo->bytes_used + sinfo->bytes_reserved + 3240 alloc_bytes < div_factor(num_bytes, 8)) 3241 return 0; 3242 3243 return 1; ··· 3277 goto out; 3278 } 3279 3280 - if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3281 spin_unlock(&space_info->lock); 3282 goto out; 3283 } 3284 spin_unlock(&space_info->lock); 3285 3286 /* 3287 * if we're doing a data chunk, go ahead and make sure that ··· 3316 return ret; 3317 } 3318 3319 - static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, 3320 - struct btrfs_root *root, 3321 - struct btrfs_space_info *sinfo, u64 num_bytes) 3322 - { 3323 - int ret; 3324 - int end_trans = 0; 3325 - 3326 - if (sinfo->full) 3327 - return 0; 3328 - 3329 - spin_lock(&sinfo->lock); 3330 - ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); 3331 - spin_unlock(&sinfo->lock); 3332 - if (!ret) 3333 - return 0; 3334 - 3335 - if (!trans) { 3336 - trans = btrfs_join_transaction(root, 1); 3337 - BUG_ON(IS_ERR(trans)); 3338 - end_trans = 1; 3339 - } 3340 - 3341 - ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3342 - num_bytes + 2 * 1024 * 1024, 3343 - get_alloc_profile(root, sinfo->flags), 0); 3344 - 3345 - if (end_trans) 3346 - btrfs_end_transaction(trans, root); 3347 - 3348 - return ret == 1 ? 1 : 0; 3349 - } 3350 - 3351 /* 3352 * shrink metadata reservation for delalloc 3353 */ 3354 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3355 - struct btrfs_root *root, u64 to_reclaim) 3356 { 3357 struct btrfs_block_rsv *block_rsv; 3358 u64 reserved; 3359 u64 max_reclaim; 3360 u64 reclaimed = 0; 3361 int pause = 1; 3362 - int ret; 3363 3364 block_rsv = &root->fs_info->delalloc_block_rsv; 3365 - spin_lock(&block_rsv->lock); 3366 - reserved = block_rsv->reserved; 3367 - spin_unlock(&block_rsv->lock); 3368 3369 if (reserved == 0) 3370 return 0; ··· 3342 max_reclaim = min(reserved, to_reclaim); 3343 3344 while (1) { 3345 - ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3346 - if (!ret) { 3347 - __set_current_state(TASK_INTERRUPTIBLE); 3348 - schedule_timeout(pause); 3349 - pause <<= 1; 3350 - if (pause > HZ / 10) 3351 - pause = HZ / 10; 3352 - } else { 3353 - pause = 1; 3354 - } 3355 3356 - spin_lock(&block_rsv->lock); 3357 - if (reserved > block_rsv->reserved) 3358 - reclaimed = reserved - block_rsv->reserved; 3359 - reserved = block_rsv->reserved; 3360 - spin_unlock(&block_rsv->lock); 3361 3362 if (reserved == 0 || reclaimed >= max_reclaim) 3363 break; 3364 3365 if (trans && trans->transaction->blocked) 3366 return -EAGAIN; 3367 } 3368 return reclaimed >= to_reclaim; 3369 } 3370 3371 - static int should_retry_reserve(struct btrfs_trans_handle *trans, 3372 - struct btrfs_root *root, 3373 - struct btrfs_block_rsv *block_rsv, 3374 - u64 num_bytes, int *retries) 3375 - { 3376 - struct btrfs_space_info *space_info = block_rsv->space_info; 3377 - int ret; 3378 - 3379 - if ((*retries) > 2) 3380 - return -ENOSPC; 3381 - 3382 - ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3383 - if (ret) 3384 - return 1; 3385 - 3386 - if (trans && trans->transaction->in_commit) 3387 - return -ENOSPC; 3388 - 3389 - ret = shrink_delalloc(trans, root, num_bytes); 3390 - if (ret) 3391 - return ret; 3392 - 3393 - spin_lock(&space_info->lock); 3394 - if (space_info->bytes_pinned < num_bytes) 3395 - ret = 1; 3396 - spin_unlock(&space_info->lock); 3397 - if (ret) 3398 - return -ENOSPC; 3399 - 3400 - (*retries)++; 3401 - 3402 - if (trans) 3403 - return -EAGAIN; 3404 - 3405 - trans = btrfs_join_transaction(root, 1); 3406 - BUG_ON(IS_ERR(trans)); 3407 - ret = btrfs_commit_transaction(trans, root); 3408 - BUG_ON(ret); 3409 - 3410 - return 1; 3411 - } 3412 - 3413 - static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3414 - u64 num_bytes) 3415 { 3416 struct btrfs_space_info *space_info = block_rsv->space_info; 3417 u64 unused; 3418 - int ret = -ENOSPC; 3419 3420 spin_lock(&space_info->lock); 3421 unused = space_info->bytes_used + space_info->bytes_reserved + 3422 - space_info->bytes_pinned + space_info->bytes_readonly; 3423 3424 - if (unused < space_info->total_bytes) 3425 - unused = space_info->total_bytes - unused; 3426 - else 3427 - unused = 0; 3428 - 3429 - if (unused >= num_bytes) { 3430 - if (block_rsv->priority >= 10) { 3431 - space_info->bytes_reserved += num_bytes; 3432 ret = 0; 3433 } else { 3434 - if ((unused + block_rsv->reserved) * 3435 - block_rsv->priority >= 3436 - (num_bytes + block_rsv->reserved) * 10) { 3437 - space_info->bytes_reserved += num_bytes; 3438 - ret = 0; 3439 - } 3440 } 3441 } 3442 spin_unlock(&space_info->lock); 3443 3444 return ret; 3445 } ··· 3606 { 3607 struct btrfs_block_rsv *block_rsv; 3608 struct btrfs_fs_info *fs_info = root->fs_info; 3609 - u64 alloc_target; 3610 3611 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3612 if (!block_rsv) 3613 return NULL; 3614 3615 btrfs_init_block_rsv(block_rsv); 3616 - 3617 - alloc_target = btrfs_get_alloc_profile(root, 0); 3618 block_rsv->space_info = __find_space_info(fs_info, 3619 BTRFS_BLOCK_GROUP_METADATA); 3620 - 3621 return block_rsv; 3622 } 3623 ··· 3644 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3645 struct btrfs_root *root, 3646 struct btrfs_block_rsv *block_rsv, 3647 - u64 num_bytes, int *retries) 3648 { 3649 int ret; 3650 3651 if (num_bytes == 0) 3652 return 0; 3653 - again: 3654 - ret = reserve_metadata_bytes(block_rsv, num_bytes); 3655 if (!ret) { 3656 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3657 return 0; 3658 } 3659 - 3660 - ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); 3661 - if (ret > 0) 3662 - goto again; 3663 3664 return ret; 3665 } ··· 3691 return 0; 3692 3693 if (block_rsv->refill_used) { 3694 - ret = reserve_metadata_bytes(block_rsv, num_bytes); 3695 if (!ret) { 3696 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 return 0; ··· 3771 3772 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3773 spin_lock(&sinfo->lock); 3774 meta_used = sinfo->bytes_used; 3775 spin_unlock(&sinfo->lock); 3776 ··· 3800 block_rsv->size = num_bytes; 3801 3802 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3803 - sinfo->bytes_reserved + sinfo->bytes_readonly; 3804 3805 if (sinfo->total_bytes > num_bytes) { 3806 num_bytes = sinfo->total_bytes - num_bytes; ··· 3872 3873 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3874 struct btrfs_root *root, 3875 - int num_items, int *retries) 3876 { 3877 u64 num_bytes; 3878 int ret; ··· 3882 3883 num_bytes = calc_trans_metadata_size(root, num_items); 3884 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3885 - num_bytes, retries); 3886 if (!ret) { 3887 trans->bytes_reserved += num_bytes; 3888 trans->block_rsv = &root->fs_info->trans_block_rsv; ··· 3956 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3957 u64 to_reserve; 3958 int nr_extents; 3959 - int retries = 0; 3960 int ret; 3961 3962 if (btrfs_transaction_in_commit(root->fs_info)) 3963 schedule_timeout(1); 3964 3965 num_bytes = ALIGN(num_bytes, root->sectorsize); 3966 - again: 3967 spin_lock(&BTRFS_I(inode)->accounting_lock); 3968 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3969 if (nr_extents > BTRFS_I(inode)->reserved_extents) { ··· 3972 nr_extents = 0; 3973 to_reserve = 0; 3974 } 3975 3976 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 - ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 - if (ret) { 3979 - spin_unlock(&BTRFS_I(inode)->accounting_lock); 3980 - ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, 3981 - &retries); 3982 - if (ret > 0) 3983 - goto again; 3984 return ret; 3985 - } 3986 3987 BTRFS_I(inode)->reserved_extents += nr_extents; 3988 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3989 spin_unlock(&BTRFS_I(inode)->accounting_lock); ··· 3987 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3988 3989 if (block_rsv->size > 512 * 1024 * 1024) 3990 - shrink_delalloc(NULL, root, to_reserve); 3991 3992 return 0; 3993 } ··· 4046 struct btrfs_root *root, 4047 u64 bytenr, u64 num_bytes, int alloc) 4048 { 4049 - struct btrfs_block_group_cache *cache; 4050 struct btrfs_fs_info *info = root->fs_info; 4051 - int factor; 4052 u64 total = num_bytes; 4053 u64 old_val; 4054 u64 byte_in_group; 4055 4056 /* block accounting for super block */ 4057 spin_lock(&info->delalloc_lock); ··· 4073 factor = 2; 4074 else 4075 factor = 1; 4076 byte_in_group = bytenr - cache->key.objectid; 4077 WARN_ON(byte_in_group > cache->key.offset); 4078 4079 spin_lock(&cache->space_info->lock); 4080 spin_lock(&cache->lock); 4081 cache->dirty = 1; 4082 old_val = btrfs_block_group_used(&cache->item); 4083 num_bytes = min(total, cache->key.offset - byte_in_group); ··· 4838 bool found_uncached_bg = false; 4839 bool failed_cluster_refill = false; 4840 bool failed_alloc = false; 4841 u64 ideal_cache_percent = 0; 4842 u64 ideal_cache_offset = 0; 4843 ··· 4853 return -ENOSPC; 4854 } 4855 4856 if (orig_root->ref_cows || empty_size) 4857 allowed_chunk_alloc = 1; 4858 4859 - if (data & BTRFS_BLOCK_GROUP_METADATA) { 4860 last_ptr = &root->fs_info->meta_alloc_cluster; 4861 if (!btrfs_test_opt(root, SSD)) 4862 empty_cluster = 64 * 1024; 4863 } 4864 4865 - if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4866 last_ptr = &root->fs_info->data_alloc_cluster; 4867 } 4868 ··· 4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4935 u64 free_percent; 4936 4937 free_percent = btrfs_block_group_used(&block_group->item); 4938 free_percent *= 100; 4939 free_percent = div64_u64(free_percent, ··· 4958 if (loop > LOOP_CACHING_NOWAIT || 4959 (loop > LOOP_FIND_IDEAL && 4960 atomic_read(&space_info->caching_threads) < 2)) { 4961 - ret = cache_block_group(block_group); 4962 BUG_ON(ret); 4963 } 4964 found_uncached_bg = true; ··· 5515 u64 num_bytes = ins->offset; 5516 5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5518 - cache_block_group(block_group); 5519 caching_ctl = get_caching_control(block_group); 5520 5521 if (!caching_ctl) { ··· 5605 block_rsv = get_block_rsv(trans, root); 5606 5607 if (block_rsv->size == 0) { 5608 - ret = reserve_metadata_bytes(block_rsv, blocksize); 5609 if (ret) 5610 return ERR_PTR(ret); 5611 return block_rsv; ··· 5615 ret = block_rsv_use_bytes(block_rsv, blocksize); 5616 if (!ret) 5617 return block_rsv; 5618 - 5619 - WARN_ON(1); 5620 - printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", 5621 - block_rsv->size, block_rsv->reserved, 5622 - block_rsv->freed[0], block_rsv->freed[1]); 5623 5624 return ERR_PTR(-ENOSPC); 5625 } ··· 5714 u64 generation; 5715 u64 refs; 5716 u64 flags; 5717 - u64 last = 0; 5718 u32 nritems; 5719 u32 blocksize; 5720 struct btrfs_key key; ··· 5781 generation); 5782 if (ret) 5783 break; 5784 - last = bytenr + blocksize; 5785 nread++; 5786 } 5787 wc->reada_slot = slot; ··· 8104 return ret; 8105 } 8106 8107 int btrfs_free_block_groups(struct btrfs_fs_info *info) 8108 { 8109 struct btrfs_block_group_cache *block_group; ··· 8221 struct btrfs_key key; 8222 struct btrfs_key found_key; 8223 struct extent_buffer *leaf; 8224 8225 root = info->extent_root; 8226 key.objectid = 0; ··· 8231 path = btrfs_alloc_path(); 8232 if (!path) 8233 return -ENOMEM; 8234 8235 while (1) { 8236 ret = find_first_block_group(root, path, &key); ··· 8262 cache->fs_info = info; 8263 INIT_LIST_HEAD(&cache->list); 8264 INIT_LIST_HEAD(&cache->cluster_list); 8265 8266 /* 8267 * we only want to have 32k of ram per block group for keeping ··· 8370 cache->key.offset = size; 8371 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8372 cache->sectorsize = root->sectorsize; 8373 8374 /* 8375 * we only want to have 32k of ram per block group for keeping track ··· 8427 struct btrfs_path *path; 8428 struct btrfs_block_group_cache *block_group; 8429 struct btrfs_free_cluster *cluster; 8430 struct btrfs_key key; 8431 int ret; 8432 8433 root = root->fs_info->extent_root; 8434 ··· 8440 BUG_ON(!block_group->ro); 8441 8442 memcpy(&key, &block_group->key, sizeof(key)); 8443 8444 /* make sure this block group isn't part of an allocation cluster */ 8445 cluster = &root->fs_info->data_alloc_cluster; ··· 8464 8465 path = btrfs_alloc_path(); 8466 BUG_ON(!path); 8467 8468 spin_lock(&root->fs_info->block_group_cache_lock); 8469 rb_erase(&block_group->cache_node, ··· 8520 spin_lock(&block_group->space_info->lock); 8521 block_group->space_info->total_bytes -= block_group->key.offset; 8522 block_group->space_info->bytes_readonly -= block_group->key.offset; 8523 spin_unlock(&block_group->space_info->lock); 8524 8525 btrfs_clear_space_info_full(root->fs_info); 8526

··· 242 return NULL; 243 } 244 245 + /* We're loading it the fast way, so we don't have a caching_ctl. */ 246 + if (!cache->caching_ctl) { 247 + spin_unlock(&cache->lock); 248 + return NULL; 249 + } 250 + 251 ctl = cache->caching_ctl; 252 atomic_inc(&ctl->count); 253 spin_unlock(&cache->lock); ··· 421 return 0; 422 } 423 424 + static int cache_block_group(struct btrfs_block_group_cache *cache, 425 + struct btrfs_trans_handle *trans, 426 + int load_cache_only) 427 { 428 struct btrfs_fs_info *fs_info = cache->fs_info; 429 struct btrfs_caching_control *caching_ctl; ··· 430 431 smp_mb(); 432 if (cache->cached != BTRFS_CACHE_NO) 433 + return 0; 434 + 435 + /* 436 + * We can't do the read from on-disk cache during a commit since we need 437 + * to have the normal tree locking. 438 + */ 439 + if (!trans->transaction->in_commit) { 440 + spin_lock(&cache->lock); 441 + if (cache->cached != BTRFS_CACHE_NO) { 442 + spin_unlock(&cache->lock); 443 + return 0; 444 + } 445 + cache->cached = BTRFS_CACHE_STARTED; 446 + spin_unlock(&cache->lock); 447 + 448 + ret = load_free_space_cache(fs_info, cache); 449 + 450 + spin_lock(&cache->lock); 451 + if (ret == 1) { 452 + cache->cached = BTRFS_CACHE_FINISHED; 453 + cache->last_byte_to_unpin = (u64)-1; 454 + } else { 455 + cache->cached = BTRFS_CACHE_NO; 456 + } 457 + spin_unlock(&cache->lock); 458 + if (ret == 1) 459 + return 0; 460 + } 461 + 462 + if (load_cache_only) 463 return 0; 464 465 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); ··· 509 510 rcu_read_lock(); 511 list_for_each_entry_rcu(found, head, list) { 512 + if (found->flags & flags) { 513 rcu_read_unlock(); 514 return found; 515 } ··· 539 return num; 540 num *= factor; 541 do_div(num, 10); 542 + return num; 543 + } 544 + 545 + static u64 div_factor_fine(u64 num, int factor) 546 + { 547 + if (factor == 100) 548 + return num; 549 + num *= factor; 550 + do_div(num, 100); 551 return num; 552 } 553 ··· 2687 return cache; 2688 } 2689 2690 + static int cache_save_setup(struct btrfs_block_group_cache *block_group, 2691 + struct btrfs_trans_handle *trans, 2692 + struct btrfs_path *path) 2693 + { 2694 + struct btrfs_root *root = block_group->fs_info->tree_root; 2695 + struct inode *inode = NULL; 2696 + u64 alloc_hint = 0; 2697 + int num_pages = 0; 2698 + int retries = 0; 2699 + int ret = 0; 2700 + 2701 + /* 2702 + * If this block group is smaller than 100 megs don't bother caching the 2703 + * block group. 2704 + */ 2705 + if (block_group->key.offset < (100 * 1024 * 1024)) { 2706 + spin_lock(&block_group->lock); 2707 + block_group->disk_cache_state = BTRFS_DC_WRITTEN; 2708 + spin_unlock(&block_group->lock); 2709 + return 0; 2710 + } 2711 + 2712 + again: 2713 + inode = lookup_free_space_inode(root, block_group, path); 2714 + if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2715 + ret = PTR_ERR(inode); 2716 + btrfs_release_path(root, path); 2717 + goto out; 2718 + } 2719 + 2720 + if (IS_ERR(inode)) { 2721 + BUG_ON(retries); 2722 + retries++; 2723 + 2724 + if (block_group->ro) 2725 + goto out_free; 2726 + 2727 + ret = create_free_space_inode(root, trans, block_group, path); 2728 + if (ret) 2729 + goto out_free; 2730 + goto again; 2731 + } 2732 + 2733 + /* 2734 + * We want to set the generation to 0, that way if anything goes wrong 2735 + * from here on out we know not to trust this cache when we load up next 2736 + * time. 2737 + */ 2738 + BTRFS_I(inode)->generation = 0; 2739 + ret = btrfs_update_inode(trans, root, inode); 2740 + WARN_ON(ret); 2741 + 2742 + if (i_size_read(inode) > 0) { 2743 + ret = btrfs_truncate_free_space_cache(root, trans, path, 2744 + inode); 2745 + if (ret) 2746 + goto out_put; 2747 + } 2748 + 2749 + spin_lock(&block_group->lock); 2750 + if (block_group->cached != BTRFS_CACHE_FINISHED) { 2751 + spin_unlock(&block_group->lock); 2752 + goto out_put; 2753 + } 2754 + spin_unlock(&block_group->lock); 2755 + 2756 + num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 2757 + if (!num_pages) 2758 + num_pages = 1; 2759 + 2760 + /* 2761 + * Just to make absolutely sure we have enough space, we're going to 2762 + * preallocate 12 pages worth of space for each block group. In 2763 + * practice we ought to use at most 8, but we need extra space so we can 2764 + * add our header and have a terminator between the extents and the 2765 + * bitmaps. 2766 + */ 2767 + num_pages *= 16; 2768 + num_pages *= PAGE_CACHE_SIZE; 2769 + 2770 + ret = btrfs_check_data_free_space(inode, num_pages); 2771 + if (ret) 2772 + goto out_put; 2773 + 2774 + ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2775 + num_pages, num_pages, 2776 + &alloc_hint); 2777 + btrfs_free_reserved_data_space(inode, num_pages); 2778 + out_put: 2779 + iput(inode); 2780 + out_free: 2781 + btrfs_release_path(root, path); 2782 + out: 2783 + spin_lock(&block_group->lock); 2784 + if (ret) 2785 + block_group->disk_cache_state = BTRFS_DC_ERROR; 2786 + else 2787 + block_group->disk_cache_state = BTRFS_DC_SETUP; 2788 + spin_unlock(&block_group->lock); 2789 + 2790 + return ret; 2791 + } 2792 + 2793 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2794 struct btrfs_root *root) 2795 { ··· 2699 if (!path) 2700 return -ENOMEM; 2701 2702 + again: 2703 + while (1) { 2704 + cache = btrfs_lookup_first_block_group(root->fs_info, last); 2705 + while (cache) { 2706 + if (cache->disk_cache_state == BTRFS_DC_CLEAR) 2707 + break; 2708 + cache = next_block_group(root, cache); 2709 + } 2710 + if (!cache) { 2711 + if (last == 0) 2712 + break; 2713 + last = 0; 2714 + continue; 2715 + } 2716 + err = cache_save_setup(cache, trans, path); 2717 + last = cache->key.objectid + cache->key.offset; 2718 + btrfs_put_block_group(cache); 2719 + } 2720 + 2721 while (1) { 2722 if (last == 0) { 2723 err = btrfs_run_delayed_refs(trans, root, ··· 2708 2709 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2710 while (cache) { 2711 + if (cache->disk_cache_state == BTRFS_DC_CLEAR) { 2712 + btrfs_put_block_group(cache); 2713 + goto again; 2714 + } 2715 + 2716 if (cache->dirty) 2717 break; 2718 cache = next_block_group(root, cache); ··· 2719 continue; 2720 } 2721 2722 + if (cache->disk_cache_state == BTRFS_DC_SETUP) 2723 + cache->disk_cache_state = BTRFS_DC_NEED_WRITE; 2724 cache->dirty = 0; 2725 last = cache->key.objectid + cache->key.offset; 2726 2727 err = write_one_cache_group(trans, root, path, cache); 2728 BUG_ON(err); 2729 + btrfs_put_block_group(cache); 2730 + } 2731 + 2732 + while (1) { 2733 + /* 2734 + * I don't think this is needed since we're just marking our 2735 + * preallocated extent as written, but just in case it can't 2736 + * hurt. 2737 + */ 2738 + if (last == 0) { 2739 + err = btrfs_run_delayed_refs(trans, root, 2740 + (unsigned long)-1); 2741 + BUG_ON(err); 2742 + } 2743 + 2744 + cache = btrfs_lookup_first_block_group(root->fs_info, last); 2745 + while (cache) { 2746 + /* 2747 + * Really this shouldn't happen, but it could if we 2748 + * couldn't write the entire preallocated extent and 2749 + * splitting the extent resulted in a new block. 2750 + */ 2751 + if (cache->dirty) { 2752 + btrfs_put_block_group(cache); 2753 + goto again; 2754 + } 2755 + if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2756 + break; 2757 + cache = next_block_group(root, cache); 2758 + } 2759 + if (!cache) { 2760 + if (last == 0) 2761 + break; 2762 + last = 0; 2763 + continue; 2764 + } 2765 + 2766 + btrfs_write_out_cache(root, trans, cache, path); 2767 + 2768 + /* 2769 + * If we didn't have an error then the cache state is still 2770 + * NEED_WRITE, so we can set it to WRITTEN. 2771 + */ 2772 + if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2773 + cache->disk_cache_state = BTRFS_DC_WRITTEN; 2774 + last = cache->key.objectid + cache->key.offset; 2775 btrfs_put_block_group(cache); 2776 } 2777 ··· 2762 if (found) { 2763 spin_lock(&found->lock); 2764 found->total_bytes += total_bytes; 2765 + found->disk_total += total_bytes * factor; 2766 found->bytes_used += bytes_used; 2767 found->disk_used += bytes_used * factor; 2768 found->full = 0; ··· 2781 BTRFS_BLOCK_GROUP_SYSTEM | 2782 BTRFS_BLOCK_GROUP_METADATA); 2783 found->total_bytes = total_bytes; 2784 + found->disk_total = total_bytes * factor; 2785 found->bytes_used = bytes_used; 2786 found->disk_used = bytes_used * factor; 2787 found->bytes_pinned = 0; ··· 2882 struct btrfs_space_info *data_sinfo; 2883 struct btrfs_root *root = BTRFS_I(inode)->root; 2884 u64 used; 2885 + int ret = 0, committed = 0, alloc_chunk = 1; 2886 2887 /* make sure bytes are sectorsize aligned */ 2888 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2889 + 2890 + if (root == root->fs_info->tree_root) { 2891 + alloc_chunk = 0; 2892 + committed = 1; 2893 + } 2894 2895 data_sinfo = BTRFS_I(inode)->space_info; 2896 if (!data_sinfo) ··· 2905 * if we don't have enough free bytes in this space then we need 2906 * to alloc a new chunk. 2907 */ 2908 + if (!data_sinfo->full && alloc_chunk) { 2909 u64 alloc_target; 2910 2911 data_sinfo->force_alloc = 1; ··· 2997 rcu_read_unlock(); 2998 } 2999 3000 + static int should_alloc_chunk(struct btrfs_root *root, 3001 + struct btrfs_space_info *sinfo, u64 alloc_bytes) 3002 { 3003 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3004 + u64 thresh; 3005 3006 if (sinfo->bytes_used + sinfo->bytes_reserved + 3007 alloc_bytes + 256 * 1024 * 1024 < num_bytes) ··· 3008 3009 if (sinfo->bytes_used + sinfo->bytes_reserved + 3010 alloc_bytes < div_factor(num_bytes, 8)) 3011 + return 0; 3012 + 3013 + thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3014 + thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3015 + 3016 + if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3017 return 0; 3018 3019 return 1; ··· 3041 goto out; 3042 } 3043 3044 + if (!force && !should_alloc_chunk(extent_root, space_info, 3045 + alloc_bytes)) { 3046 spin_unlock(&space_info->lock); 3047 goto out; 3048 } 3049 spin_unlock(&space_info->lock); 3050 + 3051 + /* 3052 + * If we have mixed data/metadata chunks we want to make sure we keep 3053 + * allocating mixed chunks instead of individual chunks. 3054 + */ 3055 + if (btrfs_mixed_space_info(space_info)) 3056 + flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); 3057 3058 /* 3059 * if we're doing a data chunk, go ahead and make sure that ··· 3072 return ret; 3073 } 3074 3075 /* 3076 * shrink metadata reservation for delalloc 3077 */ 3078 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3079 + struct btrfs_root *root, u64 to_reclaim, int sync) 3080 { 3081 struct btrfs_block_rsv *block_rsv; 3082 + struct btrfs_space_info *space_info; 3083 u64 reserved; 3084 u64 max_reclaim; 3085 u64 reclaimed = 0; 3086 int pause = 1; 3087 + int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3088 3089 block_rsv = &root->fs_info->delalloc_block_rsv; 3090 + space_info = block_rsv->space_info; 3091 + 3092 + smp_mb(); 3093 + reserved = space_info->bytes_reserved; 3094 3095 if (reserved == 0) 3096 return 0; ··· 3128 max_reclaim = min(reserved, to_reclaim); 3129 3130 while (1) { 3131 + /* have the flusher threads jump in and do some IO */ 3132 + smp_mb(); 3133 + nr_pages = min_t(unsigned long, nr_pages, 3134 + root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3135 + writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3136 3137 + spin_lock(&space_info->lock); 3138 + if (reserved > space_info->bytes_reserved) 3139 + reclaimed += reserved - space_info->bytes_reserved; 3140 + reserved = space_info->bytes_reserved; 3141 + spin_unlock(&space_info->lock); 3142 3143 if (reserved == 0 || reclaimed >= max_reclaim) 3144 break; 3145 3146 if (trans && trans->transaction->blocked) 3147 return -EAGAIN; 3148 + 3149 + __set_current_state(TASK_INTERRUPTIBLE); 3150 + schedule_timeout(pause); 3151 + pause <<= 1; 3152 + if (pause > HZ / 10) 3153 + pause = HZ / 10; 3154 + 3155 } 3156 return reclaimed >= to_reclaim; 3157 } 3158 3159 + /* 3160 + * Retries tells us how many times we've called reserve_metadata_bytes. The 3161 + * idea is if this is the first call (retries == 0) then we will add to our 3162 + * reserved count if we can't make the allocation in order to hold our place 3163 + * while we go and try and free up space. That way for retries > 1 we don't try 3164 + * and add space, we just check to see if the amount of unused space is >= the 3165 + * total space, meaning that our reservation is valid. 3166 + * 3167 + * However if we don't intend to retry this reservation, pass -1 as retries so 3168 + * that it short circuits this logic. 3169 + */ 3170 + static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3171 + struct btrfs_root *root, 3172 + struct btrfs_block_rsv *block_rsv, 3173 + u64 orig_bytes, int flush) 3174 { 3175 struct btrfs_space_info *space_info = block_rsv->space_info; 3176 u64 unused; 3177 + u64 num_bytes = orig_bytes; 3178 + int retries = 0; 3179 + int ret = 0; 3180 + bool reserved = false; 3181 + bool committed = false; 3182 + 3183 + again: 3184 + ret = -ENOSPC; 3185 + if (reserved) 3186 + num_bytes = 0; 3187 3188 spin_lock(&space_info->lock); 3189 unused = space_info->bytes_used + space_info->bytes_reserved + 3190 + space_info->bytes_pinned + space_info->bytes_readonly + 3191 + space_info->bytes_may_use; 3192 3193 + /* 3194 + * The idea here is that we've not already over-reserved the block group 3195 + * then we can go ahead and save our reservation first and then start 3196 + * flushing if we need to. Otherwise if we've already overcommitted 3197 + * lets start flushing stuff first and then come back and try to make 3198 + * our reservation. 3199 + */ 3200 + if (unused <= space_info->total_bytes) { 3201 + unused -= space_info->total_bytes; 3202 + if (unused >= num_bytes) { 3203 + if (!reserved) 3204 + space_info->bytes_reserved += orig_bytes; 3205 ret = 0; 3206 } else { 3207 + /* 3208 + * Ok set num_bytes to orig_bytes since we aren't 3209 + * overocmmitted, this way we only try and reclaim what 3210 + * we need. 3211 + */ 3212 + num_bytes = orig_bytes; 3213 } 3214 + } else { 3215 + /* 3216 + * Ok we're over committed, set num_bytes to the overcommitted 3217 + * amount plus the amount of bytes that we need for this 3218 + * reservation. 3219 + */ 3220 + num_bytes = unused - space_info->total_bytes + 3221 + (orig_bytes * (retries + 1)); 3222 } 3223 + 3224 + /* 3225 + * Couldn't make our reservation, save our place so while we're trying 3226 + * to reclaim space we can actually use it instead of somebody else 3227 + * stealing it from us. 3228 + */ 3229 + if (ret && !reserved) { 3230 + space_info->bytes_reserved += orig_bytes; 3231 + reserved = true; 3232 + } 3233 + 3234 spin_unlock(&space_info->lock); 3235 + 3236 + if (!ret) 3237 + return 0; 3238 + 3239 + if (!flush) 3240 + goto out; 3241 + 3242 + /* 3243 + * We do synchronous shrinking since we don't actually unreserve 3244 + * metadata until after the IO is completed. 3245 + */ 3246 + ret = shrink_delalloc(trans, root, num_bytes, 1); 3247 + if (ret > 0) 3248 + return 0; 3249 + else if (ret < 0) 3250 + goto out; 3251 + 3252 + /* 3253 + * So if we were overcommitted it's possible that somebody else flushed 3254 + * out enough space and we simply didn't have enough space to reclaim, 3255 + * so go back around and try again. 3256 + */ 3257 + if (retries < 2) { 3258 + retries++; 3259 + goto again; 3260 + } 3261 + 3262 + spin_lock(&space_info->lock); 3263 + /* 3264 + * Not enough space to be reclaimed, don't bother committing the 3265 + * transaction. 3266 + */ 3267 + if (space_info->bytes_pinned < orig_bytes) 3268 + ret = -ENOSPC; 3269 + spin_unlock(&space_info->lock); 3270 + if (ret) 3271 + goto out; 3272 + 3273 + ret = -EAGAIN; 3274 + if (trans || committed) 3275 + goto out; 3276 + 3277 + ret = -ENOSPC; 3278 + trans = btrfs_join_transaction(root, 1); 3279 + if (IS_ERR(trans)) 3280 + goto out; 3281 + ret = btrfs_commit_transaction(trans, root); 3282 + if (!ret) { 3283 + trans = NULL; 3284 + committed = true; 3285 + goto again; 3286 + } 3287 + 3288 + out: 3289 + if (reserved) { 3290 + spin_lock(&space_info->lock); 3291 + space_info->bytes_reserved -= orig_bytes; 3292 + spin_unlock(&space_info->lock); 3293 + } 3294 3295 return ret; 3296 } ··· 3327 { 3328 struct btrfs_block_rsv *block_rsv; 3329 struct btrfs_fs_info *fs_info = root->fs_info; 3330 3331 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3332 if (!block_rsv) 3333 return NULL; 3334 3335 btrfs_init_block_rsv(block_rsv); 3336 block_rsv->space_info = __find_space_info(fs_info, 3337 BTRFS_BLOCK_GROUP_METADATA); 3338 return block_rsv; 3339 } 3340 ··· 3369 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3370 struct btrfs_root *root, 3371 struct btrfs_block_rsv *block_rsv, 3372 + u64 num_bytes) 3373 { 3374 int ret; 3375 3376 if (num_bytes == 0) 3377 return 0; 3378 + 3379 + ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3380 if (!ret) { 3381 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3382 return 0; 3383 } 3384 3385 return ret; 3386 } ··· 3420 return 0; 3421 3422 if (block_rsv->refill_used) { 3423 + ret = reserve_metadata_bytes(trans, root, block_rsv, 3424 + num_bytes, 0); 3425 if (!ret) { 3426 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3427 return 0; ··· 3499 3500 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3501 spin_lock(&sinfo->lock); 3502 + if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) 3503 + data_used = 0; 3504 meta_used = sinfo->bytes_used; 3505 spin_unlock(&sinfo->lock); 3506 ··· 3526 block_rsv->size = num_bytes; 3527 3528 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3529 + sinfo->bytes_reserved + sinfo->bytes_readonly + 3530 + sinfo->bytes_may_use; 3531 3532 if (sinfo->total_bytes > num_bytes) { 3533 num_bytes = sinfo->total_bytes - num_bytes; ··· 3597 3598 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3599 struct btrfs_root *root, 3600 + int num_items) 3601 { 3602 u64 num_bytes; 3603 int ret; ··· 3607 3608 num_bytes = calc_trans_metadata_size(root, num_items); 3609 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3610 + num_bytes); 3611 if (!ret) { 3612 trans->bytes_reserved += num_bytes; 3613 trans->block_rsv = &root->fs_info->trans_block_rsv; ··· 3681 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3682 u64 to_reserve; 3683 int nr_extents; 3684 int ret; 3685 3686 if (btrfs_transaction_in_commit(root->fs_info)) 3687 schedule_timeout(1); 3688 3689 num_bytes = ALIGN(num_bytes, root->sectorsize); 3690 + 3691 spin_lock(&BTRFS_I(inode)->accounting_lock); 3692 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3693 if (nr_extents > BTRFS_I(inode)->reserved_extents) { ··· 3698 nr_extents = 0; 3699 to_reserve = 0; 3700 } 3701 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 3702 3703 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3704 + ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 3705 + if (ret) 3706 return ret; 3707 3708 + spin_lock(&BTRFS_I(inode)->accounting_lock); 3709 BTRFS_I(inode)->reserved_extents += nr_extents; 3710 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3711 spin_unlock(&BTRFS_I(inode)->accounting_lock); ··· 3717 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3718 3719 if (block_rsv->size > 512 * 1024 * 1024) 3720 + shrink_delalloc(NULL, root, to_reserve, 0); 3721 3722 return 0; 3723 } ··· 3776 struct btrfs_root *root, 3777 u64 bytenr, u64 num_bytes, int alloc) 3778 { 3779 + struct btrfs_block_group_cache *cache = NULL; 3780 struct btrfs_fs_info *info = root->fs_info; 3781 u64 total = num_bytes; 3782 u64 old_val; 3783 u64 byte_in_group; 3784 + int factor; 3785 3786 /* block accounting for super block */ 3787 spin_lock(&info->delalloc_lock); ··· 3803 factor = 2; 3804 else 3805 factor = 1; 3806 + /* 3807 + * If this block group has free space cache written out, we 3808 + * need to make sure to load it if we are removing space. This 3809 + * is because we need the unpinning stage to actually add the 3810 + * space back to the block group, otherwise we will leak space. 3811 + */ 3812 + if (!alloc && cache->cached == BTRFS_CACHE_NO) 3813 + cache_block_group(cache, trans, 1); 3814 + 3815 byte_in_group = bytenr - cache->key.objectid; 3816 WARN_ON(byte_in_group > cache->key.offset); 3817 3818 spin_lock(&cache->space_info->lock); 3819 spin_lock(&cache->lock); 3820 + 3821 + if (btrfs_super_cache_generation(&info->super_copy) != 0 && 3822 + cache->disk_cache_state < BTRFS_DC_CLEAR) 3823 + cache->disk_cache_state = BTRFS_DC_CLEAR; 3824 + 3825 cache->dirty = 1; 3826 old_val = btrfs_block_group_used(&cache->item); 3827 num_bytes = min(total, cache->key.offset - byte_in_group); ··· 4554 bool found_uncached_bg = false; 4555 bool failed_cluster_refill = false; 4556 bool failed_alloc = false; 4557 + bool use_cluster = true; 4558 u64 ideal_cache_percent = 0; 4559 u64 ideal_cache_offset = 0; 4560 ··· 4568 return -ENOSPC; 4569 } 4570 4571 + /* 4572 + * If the space info is for both data and metadata it means we have a 4573 + * small filesystem and we can't use the clustering stuff. 4574 + */ 4575 + if (btrfs_mixed_space_info(space_info)) 4576 + use_cluster = false; 4577 + 4578 if (orig_root->ref_cows || empty_size) 4579 allowed_chunk_alloc = 1; 4580 4581 + if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 4582 last_ptr = &root->fs_info->meta_alloc_cluster; 4583 if (!btrfs_test_opt(root, SSD)) 4584 empty_cluster = 64 * 1024; 4585 } 4586 4587 + if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 4588 + btrfs_test_opt(root, SSD)) { 4589 last_ptr = &root->fs_info->data_alloc_cluster; 4590 } 4591 ··· 4641 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4642 u64 free_percent; 4643 4644 + ret = cache_block_group(block_group, trans, 1); 4645 + if (block_group->cached == BTRFS_CACHE_FINISHED) 4646 + goto have_block_group; 4647 + 4648 free_percent = btrfs_block_group_used(&block_group->item); 4649 free_percent *= 100; 4650 free_percent = div64_u64(free_percent, ··· 4661 if (loop > LOOP_CACHING_NOWAIT || 4662 (loop > LOOP_FIND_IDEAL && 4663 atomic_read(&space_info->caching_threads) < 2)) { 4664 + ret = cache_block_group(block_group, trans, 0); 4665 BUG_ON(ret); 4666 } 4667 found_uncached_bg = true; ··· 5218 u64 num_bytes = ins->offset; 5219 5220 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5221 + cache_block_group(block_group, trans, 0); 5222 caching_ctl = get_caching_control(block_group); 5223 5224 if (!caching_ctl) { ··· 5308 block_rsv = get_block_rsv(trans, root); 5309 5310 if (block_rsv->size == 0) { 5311 + ret = reserve_metadata_bytes(trans, root, block_rsv, 5312 + blocksize, 0); 5313 if (ret) 5314 return ERR_PTR(ret); 5315 return block_rsv; ··· 5317 ret = block_rsv_use_bytes(block_rsv, blocksize); 5318 if (!ret) 5319 return block_rsv; 5320 5321 return ERR_PTR(-ENOSPC); 5322 } ··· 5421 u64 generation; 5422 u64 refs; 5423 u64 flags; 5424 u32 nritems; 5425 u32 blocksize; 5426 struct btrfs_key key; ··· 5489 generation); 5490 if (ret) 5491 break; 5492 nread++; 5493 } 5494 wc->reada_slot = slot; ··· 7813 return ret; 7814 } 7815 7816 + void btrfs_put_block_group_cache(struct btrfs_fs_info *info) 7817 + { 7818 + struct btrfs_block_group_cache *block_group; 7819 + u64 last = 0; 7820 + 7821 + while (1) { 7822 + struct inode *inode; 7823 + 7824 + block_group = btrfs_lookup_first_block_group(info, last); 7825 + while (block_group) { 7826 + spin_lock(&block_group->lock); 7827 + if (block_group->iref) 7828 + break; 7829 + spin_unlock(&block_group->lock); 7830 + block_group = next_block_group(info->tree_root, 7831 + block_group); 7832 + } 7833 + if (!block_group) { 7834 + if (last == 0) 7835 + break; 7836 + last = 0; 7837 + continue; 7838 + } 7839 + 7840 + inode = block_group->inode; 7841 + block_group->iref = 0; 7842 + block_group->inode = NULL; 7843 + spin_unlock(&block_group->lock); 7844 + iput(inode); 7845 + last = block_group->key.objectid + block_group->key.offset; 7846 + btrfs_put_block_group(block_group); 7847 + } 7848 + } 7849 + 7850 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7851 { 7852 struct btrfs_block_group_cache *block_group; ··· 7896 struct btrfs_key key; 7897 struct btrfs_key found_key; 7898 struct extent_buffer *leaf; 7899 + int need_clear = 0; 7900 + u64 cache_gen; 7901 7902 root = info->extent_root; 7903 key.objectid = 0; ··· 7904 path = btrfs_alloc_path(); 7905 if (!path) 7906 return -ENOMEM; 7907 + 7908 + cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 7909 + if (cache_gen != 0 && 7910 + btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 7911 + need_clear = 1; 7912 + if (btrfs_test_opt(root, CLEAR_CACHE)) 7913 + need_clear = 1; 7914 + if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) 7915 + printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 7916 7917 while (1) { 7918 ret = find_first_block_group(root, path, &key); ··· 7926 cache->fs_info = info; 7927 INIT_LIST_HEAD(&cache->list); 7928 INIT_LIST_HEAD(&cache->cluster_list); 7929 + 7930 + if (need_clear) 7931 + cache->disk_cache_state = BTRFS_DC_CLEAR; 7932 7933 /* 7934 * we only want to have 32k of ram per block group for keeping ··· 8031 cache->key.offset = size; 8032 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8033 cache->sectorsize = root->sectorsize; 8034 + cache->fs_info = root->fs_info; 8035 8036 /* 8037 * we only want to have 32k of ram per block group for keeping track ··· 8087 struct btrfs_path *path; 8088 struct btrfs_block_group_cache *block_group; 8089 struct btrfs_free_cluster *cluster; 8090 + struct btrfs_root *tree_root = root->fs_info->tree_root; 8091 struct btrfs_key key; 8092 + struct inode *inode; 8093 int ret; 8094 + int factor; 8095 8096 root = root->fs_info->extent_root; 8097 ··· 8097 BUG_ON(!block_group->ro); 8098 8099 memcpy(&key, &block_group->key, sizeof(key)); 8100 + if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 8101 + BTRFS_BLOCK_GROUP_RAID1 | 8102 + BTRFS_BLOCK_GROUP_RAID10)) 8103 + factor = 2; 8104 + else 8105 + factor = 1; 8106 8107 /* make sure this block group isn't part of an allocation cluster */ 8108 cluster = &root->fs_info->data_alloc_cluster; ··· 8115 8116 path = btrfs_alloc_path(); 8117 BUG_ON(!path); 8118 + 8119 + inode = lookup_free_space_inode(root, block_group, path); 8120 + if (!IS_ERR(inode)) { 8121 + btrfs_orphan_add(trans, inode); 8122 + clear_nlink(inode); 8123 + /* One for the block groups ref */ 8124 + spin_lock(&block_group->lock); 8125 + if (block_group->iref) { 8126 + block_group->iref = 0; 8127 + block_group->inode = NULL; 8128 + spin_unlock(&block_group->lock); 8129 + iput(inode); 8130 + } else { 8131 + spin_unlock(&block_group->lock); 8132 + } 8133 + /* One for our lookup ref */ 8134 + iput(inode); 8135 + } 8136 + 8137 + key.objectid = BTRFS_FREE_SPACE_OBJECTID; 8138 + key.offset = block_group->key.objectid; 8139 + key.type = 0; 8140 + 8141 + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 8142 + if (ret < 0) 8143 + goto out; 8144 + if (ret > 0) 8145 + btrfs_release_path(tree_root, path); 8146 + if (ret == 0) { 8147 + ret = btrfs_del_item(trans, tree_root, path); 8148 + if (ret) 8149 + goto out; 8150 + btrfs_release_path(tree_root, path); 8151 + } 8152 8153 spin_lock(&root->fs_info->block_group_cache_lock); 8154 rb_erase(&block_group->cache_node, ··· 8137 spin_lock(&block_group->space_info->lock); 8138 block_group->space_info->total_bytes -= block_group->key.offset; 8139 block_group->space_info->bytes_readonly -= block_group->key.offset; 8140 + block_group->space_info->disk_total -= block_group->key.offset * factor; 8141 spin_unlock(&block_group->space_info->lock); 8142 + 8143 + memcpy(&key, &block_group->key, sizeof(key)); 8144 8145 btrfs_clear_space_info_full(root->fs_info); 8146

+85 -87

fs/btrfs/extent_io.c

··· 104 struct address_space *mapping, gfp_t mask) 105 { 106 tree->state = RB_ROOT; 107 - tree->buffer = RB_ROOT; 108 tree->ops = NULL; 109 tree->dirty_bytes = 0; 110 spin_lock_init(&tree->lock); ··· 233 if (!ret) 234 return prev; 235 return ret; 236 - } 237 - 238 - static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, 239 - u64 offset, struct rb_node *node) 240 - { 241 - struct rb_root *root = &tree->buffer; 242 - struct rb_node **p = &root->rb_node; 243 - struct rb_node *parent = NULL; 244 - struct extent_buffer *eb; 245 - 246 - while (*p) { 247 - parent = *p; 248 - eb = rb_entry(parent, struct extent_buffer, rb_node); 249 - 250 - if (offset < eb->start) 251 - p = &(*p)->rb_left; 252 - else if (offset > eb->start) 253 - p = &(*p)->rb_right; 254 - else 255 - return eb; 256 - } 257 - 258 - rb_link_node(node, parent, p); 259 - rb_insert_color(node, root); 260 - return NULL; 261 - } 262 - 263 - static struct extent_buffer *buffer_search(struct extent_io_tree *tree, 264 - u64 offset) 265 - { 266 - struct rb_root *root = &tree->buffer; 267 - struct rb_node *n = root->rb_node; 268 - struct extent_buffer *eb; 269 - 270 - while (n) { 271 - eb = rb_entry(n, struct extent_buffer, rb_node); 272 - if (offset < eb->start) 273 - n = n->rb_left; 274 - else if (offset > eb->start) 275 - n = n->rb_right; 276 - else 277 - return eb; 278 - } 279 - return NULL; 280 } 281 282 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, ··· 1857 struct page *page = bvec->bv_page; 1858 struct extent_io_tree *tree = bio->bi_private; 1859 u64 start; 1860 - u64 end; 1861 1862 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1863 - end = start + bvec->bv_len - 1; 1864 1865 bio->bi_private = NULL; 1866 ··· 2158 u64 last_byte = i_size_read(inode); 2159 u64 block_start; 2160 u64 iosize; 2161 - u64 unlock_start; 2162 sector_t sector; 2163 struct extent_state *cached_state = NULL; 2164 struct extent_map *em; ··· 2282 if (tree->ops && tree->ops->writepage_end_io_hook) 2283 tree->ops->writepage_end_io_hook(page, start, 2284 page_end, NULL, 1); 2285 - unlock_start = page_end + 1; 2286 goto done; 2287 } 2288 ··· 2292 if (tree->ops && tree->ops->writepage_end_io_hook) 2293 tree->ops->writepage_end_io_hook(page, cur, 2294 page_end, NULL, 1); 2295 - unlock_start = page_end + 1; 2296 break; 2297 } 2298 em = epd->get_extent(inode, page, pg_offset, cur, ··· 2338 2339 cur += iosize; 2340 pg_offset += iosize; 2341 - unlock_start = cur; 2342 continue; 2343 } 2344 /* leave this out until we have a page_mkwrite call */ ··· 2423 pgoff_t index; 2424 pgoff_t end; /* Inclusive */ 2425 int scanned = 0; 2426 - int range_whole = 0; 2427 2428 pagevec_init(&pvec, 0); 2429 if (wbc->range_cyclic) { ··· 2431 } else { 2432 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2433 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2434 - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2435 - range_whole = 1; 2436 scanned = 1; 2437 } 2438 retry: ··· 2770 NULL, 1, 2771 end_bio_extent_preparewrite, 0, 2772 0, 0); 2773 iocount++; 2774 block_start = block_start + iosize; 2775 } else { ··· 3053 kmem_cache_free(extent_buffer_cache, eb); 3054 } 3055 3056 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3057 u64 start, unsigned long len, 3058 struct page *page0, ··· 3099 struct page *p; 3100 struct address_space *mapping = tree->mapping; 3101 int uptodate = 1; 3102 3103 - spin_lock(&tree->buffer_lock); 3104 - eb = buffer_search(tree, start); 3105 - if (eb) { 3106 - atomic_inc(&eb->refs); 3107 - spin_unlock(&tree->buffer_lock); 3108 mark_page_accessed(eb->first_page); 3109 return eb; 3110 } 3111 - spin_unlock(&tree->buffer_lock); 3112 3113 eb = __alloc_extent_buffer(tree, start, len, mask); 3114 if (!eb) ··· 3147 if (uptodate) 3148 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3149 3150 spin_lock(&tree->buffer_lock); 3151 - exists = buffer_tree_insert(tree, start, &eb->rb_node); 3152 - if (exists) { 3153 /* add one reference for the caller */ 3154 atomic_inc(&exists->refs); 3155 spin_unlock(&tree->buffer_lock); 3156 goto free_eb; 3157 } 3158 /* add one reference for the tree */ 3159 atomic_inc(&eb->refs); 3160 spin_unlock(&tree->buffer_lock); 3161 return eb; 3162 3163 free_eb: 3164 if (!atomic_dec_and_test(&eb->refs)) 3165 return exists; 3166 - for (index = 1; index < i; index++) 3167 - page_cache_release(extent_buffer_page(eb, index)); 3168 - page_cache_release(extent_buffer_page(eb, 0)); 3169 - __free_extent_buffer(eb); 3170 return exists; 3171 } 3172 ··· 3181 { 3182 struct extent_buffer *eb; 3183 3184 - spin_lock(&tree->buffer_lock); 3185 - eb = buffer_search(tree, start); 3186 - if (eb) 3187 - atomic_inc(&eb->refs); 3188 - spin_unlock(&tree->buffer_lock); 3189 - 3190 - if (eb) 3191 mark_page_accessed(eb->first_page); 3192 3193 - return eb; 3194 } 3195 3196 void free_extent_buffer(struct extent_buffer *eb) ··· 3820 } 3821 } 3822 3823 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3824 { 3825 u64 start = page_offset(page); 3826 struct extent_buffer *eb; 3827 int ret = 1; 3828 - unsigned long i; 3829 - unsigned long num_pages; 3830 3831 spin_lock(&tree->buffer_lock); 3832 - eb = buffer_search(tree, start); 3833 if (!eb) 3834 goto out; 3835 3836 - if (atomic_read(&eb->refs) > 1) { 3837 - ret = 0; 3838 - goto out; 3839 - } 3840 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3841 ret = 0; 3842 goto out; 3843 } 3844 - /* at this point we can safely release the extent buffer */ 3845 - num_pages = num_extent_pages(eb->start, eb->len); 3846 - for (i = 0; i < num_pages; i++) 3847 - page_cache_release(extent_buffer_page(eb, i)); 3848 - rb_erase(&eb->rb_node, &tree->buffer); 3849 - __free_extent_buffer(eb); 3850 out: 3851 spin_unlock(&tree->buffer_lock); 3852 return ret; 3853 }

··· 104 struct address_space *mapping, gfp_t mask) 105 { 106 tree->state = RB_ROOT; 107 + INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); 108 tree->ops = NULL; 109 tree->dirty_bytes = 0; 110 spin_lock_init(&tree->lock); ··· 233 if (!ret) 234 return prev; 235 return ret; 236 } 237 238 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, ··· 1901 struct page *page = bvec->bv_page; 1902 struct extent_io_tree *tree = bio->bi_private; 1903 u64 start; 1904 1905 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1906 1907 bio->bi_private = NULL; 1908 ··· 2204 u64 last_byte = i_size_read(inode); 2205 u64 block_start; 2206 u64 iosize; 2207 sector_t sector; 2208 struct extent_state *cached_state = NULL; 2209 struct extent_map *em; ··· 2329 if (tree->ops && tree->ops->writepage_end_io_hook) 2330 tree->ops->writepage_end_io_hook(page, start, 2331 page_end, NULL, 1); 2332 goto done; 2333 } 2334 ··· 2340 if (tree->ops && tree->ops->writepage_end_io_hook) 2341 tree->ops->writepage_end_io_hook(page, cur, 2342 page_end, NULL, 1); 2343 break; 2344 } 2345 em = epd->get_extent(inode, page, pg_offset, cur, ··· 2387 2388 cur += iosize; 2389 pg_offset += iosize; 2390 continue; 2391 } 2392 /* leave this out until we have a page_mkwrite call */ ··· 2473 pgoff_t index; 2474 pgoff_t end; /* Inclusive */ 2475 int scanned = 0; 2476 2477 pagevec_init(&pvec, 0); 2478 if (wbc->range_cyclic) { ··· 2482 } else { 2483 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2484 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2485 scanned = 1; 2486 } 2487 retry: ··· 2823 NULL, 1, 2824 end_bio_extent_preparewrite, 0, 2825 0, 0); 2826 + if (ret && !err) 2827 + err = ret; 2828 iocount++; 2829 block_start = block_start + iosize; 2830 } else { ··· 3104 kmem_cache_free(extent_buffer_cache, eb); 3105 } 3106 3107 + /* 3108 + * Helper for releasing extent buffer page. 3109 + */ 3110 + static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, 3111 + unsigned long start_idx) 3112 + { 3113 + unsigned long index; 3114 + struct page *page; 3115 + 3116 + if (!eb->first_page) 3117 + return; 3118 + 3119 + index = num_extent_pages(eb->start, eb->len); 3120 + if (start_idx >= index) 3121 + return; 3122 + 3123 + do { 3124 + index--; 3125 + page = extent_buffer_page(eb, index); 3126 + if (page) 3127 + page_cache_release(page); 3128 + } while (index != start_idx); 3129 + } 3130 + 3131 + /* 3132 + * Helper for releasing the extent buffer. 3133 + */ 3134 + static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) 3135 + { 3136 + btrfs_release_extent_buffer_page(eb, 0); 3137 + __free_extent_buffer(eb); 3138 + } 3139 + 3140 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3141 u64 start, unsigned long len, 3142 struct page *page0, ··· 3117 struct page *p; 3118 struct address_space *mapping = tree->mapping; 3119 int uptodate = 1; 3120 + int ret; 3121 3122 + rcu_read_lock(); 3123 + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3124 + if (eb && atomic_inc_not_zero(&eb->refs)) { 3125 + rcu_read_unlock(); 3126 mark_page_accessed(eb->first_page); 3127 return eb; 3128 } 3129 + rcu_read_unlock(); 3130 3131 eb = __alloc_extent_buffer(tree, start, len, mask); 3132 if (!eb) ··· 3165 if (uptodate) 3166 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3167 3168 + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 3169 + if (ret) 3170 + goto free_eb; 3171 + 3172 spin_lock(&tree->buffer_lock); 3173 + ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); 3174 + if (ret == -EEXIST) { 3175 + exists = radix_tree_lookup(&tree->buffer, 3176 + start >> PAGE_CACHE_SHIFT); 3177 /* add one reference for the caller */ 3178 atomic_inc(&exists->refs); 3179 spin_unlock(&tree->buffer_lock); 3180 + radix_tree_preload_end(); 3181 goto free_eb; 3182 } 3183 /* add one reference for the tree */ 3184 atomic_inc(&eb->refs); 3185 spin_unlock(&tree->buffer_lock); 3186 + radix_tree_preload_end(); 3187 return eb; 3188 3189 free_eb: 3190 if (!atomic_dec_and_test(&eb->refs)) 3191 return exists; 3192 + btrfs_release_extent_buffer(eb); 3193 return exists; 3194 } 3195 ··· 3194 { 3195 struct extent_buffer *eb; 3196 3197 + rcu_read_lock(); 3198 + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3199 + if (eb && atomic_inc_not_zero(&eb->refs)) { 3200 + rcu_read_unlock(); 3201 mark_page_accessed(eb->first_page); 3202 + return eb; 3203 + } 3204 + rcu_read_unlock(); 3205 3206 + return NULL; 3207 } 3208 3209 void free_extent_buffer(struct extent_buffer *eb) ··· 3833 } 3834 } 3835 3836 + static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 3837 + { 3838 + struct extent_buffer *eb = 3839 + container_of(head, struct extent_buffer, rcu_head); 3840 + 3841 + btrfs_release_extent_buffer(eb); 3842 + } 3843 + 3844 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3845 { 3846 u64 start = page_offset(page); 3847 struct extent_buffer *eb; 3848 int ret = 1; 3849 3850 spin_lock(&tree->buffer_lock); 3851 + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3852 if (!eb) 3853 goto out; 3854 3855 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3856 ret = 0; 3857 goto out; 3858 } 3859 + 3860 + /* 3861 + * set @eb->refs to 0 if it is already 1, and then release the @eb. 3862 + * Or go back. 3863 + */ 3864 + if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { 3865 + ret = 0; 3866 + goto out; 3867 + } 3868 + 3869 + radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3870 out: 3871 spin_unlock(&tree->buffer_lock); 3872 + 3873 + /* at this point we can safely release the extent buffer */ 3874 + if (atomic_read(&eb->refs) == 0) 3875 + call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 3876 return ret; 3877 }

+2 -2

fs/btrfs/extent_io.h

··· 85 86 struct extent_io_tree { 87 struct rb_root state; 88 - struct rb_root buffer; 89 struct address_space *mapping; 90 u64 dirty_bytes; 91 spinlock_t lock; ··· 123 unsigned long bflags; 124 atomic_t refs; 125 struct list_head leak_list; 126 - struct rb_node rb_node; 127 128 /* the spinlock is used to protect most operations */ 129 spinlock_t lock;

··· 85 86 struct extent_io_tree { 87 struct rb_root state; 88 + struct radix_tree_root buffer; 89 struct address_space *mapping; 90 u64 dirty_bytes; 91 spinlock_t lock; ··· 123 unsigned long bflags; 124 atomic_t refs; 125 struct list_head leak_list; 126 + struct rcu_head rcu_head; 127 128 /* the spinlock is used to protect most operations */ 129 spinlock_t lock;

+2 -2

fs/btrfs/extent_map.c

··· 335 goto out; 336 } 337 if (IS_ERR(rb_node)) { 338 - em = ERR_PTR(PTR_ERR(rb_node)); 339 goto out; 340 } 341 em = rb_entry(rb_node, struct extent_map, rb_node); ··· 384 goto out; 385 } 386 if (IS_ERR(rb_node)) { 387 - em = ERR_PTR(PTR_ERR(rb_node)); 388 goto out; 389 } 390 em = rb_entry(rb_node, struct extent_map, rb_node);

··· 335 goto out; 336 } 337 if (IS_ERR(rb_node)) { 338 + em = ERR_CAST(rb_node); 339 goto out; 340 } 341 em = rb_entry(rb_node, struct extent_map, rb_node); ··· 384 goto out; 385 } 386 if (IS_ERR(rb_node)) { 387 + em = ERR_CAST(rb_node); 388 goto out; 389 } 390 em = rb_entry(rb_node, struct extent_map, rb_node);

+751

fs/btrfs/free-space-cache.c

··· 23 #include "ctree.h" 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 27 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29 30 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 31 u64 offset)

··· 23 #include "ctree.h" 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 + #include "disk-io.h" 27 28 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30 + 31 + static void recalculate_thresholds(struct btrfs_block_group_cache 32 + *block_group); 33 + static int link_free_space(struct btrfs_block_group_cache *block_group, 34 + struct btrfs_free_space *info); 35 + 36 + struct inode *lookup_free_space_inode(struct btrfs_root *root, 37 + struct btrfs_block_group_cache 38 + *block_group, struct btrfs_path *path) 39 + { 40 + struct btrfs_key key; 41 + struct btrfs_key location; 42 + struct btrfs_disk_key disk_key; 43 + struct btrfs_free_space_header *header; 44 + struct extent_buffer *leaf; 45 + struct inode *inode = NULL; 46 + int ret; 47 + 48 + spin_lock(&block_group->lock); 49 + if (block_group->inode) 50 + inode = igrab(block_group->inode); 51 + spin_unlock(&block_group->lock); 52 + if (inode) 53 + return inode; 54 + 55 + key.objectid = BTRFS_FREE_SPACE_OBJECTID; 56 + key.offset = block_group->key.objectid; 57 + key.type = 0; 58 + 59 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 60 + if (ret < 0) 61 + return ERR_PTR(ret); 62 + if (ret > 0) { 63 + btrfs_release_path(root, path); 64 + return ERR_PTR(-ENOENT); 65 + } 66 + 67 + leaf = path->nodes[0]; 68 + header = btrfs_item_ptr(leaf, path->slots[0], 69 + struct btrfs_free_space_header); 70 + btrfs_free_space_key(leaf, header, &disk_key); 71 + btrfs_disk_key_to_cpu(&location, &disk_key); 72 + btrfs_release_path(root, path); 73 + 74 + inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 75 + if (!inode) 76 + return ERR_PTR(-ENOENT); 77 + if (IS_ERR(inode)) 78 + return inode; 79 + if (is_bad_inode(inode)) { 80 + iput(inode); 81 + return ERR_PTR(-ENOENT); 82 + } 83 + 84 + spin_lock(&block_group->lock); 85 + if (!root->fs_info->closing) { 86 + block_group->inode = igrab(inode); 87 + block_group->iref = 1; 88 + } 89 + spin_unlock(&block_group->lock); 90 + 91 + return inode; 92 + } 93 + 94 + int create_free_space_inode(struct btrfs_root *root, 95 + struct btrfs_trans_handle *trans, 96 + struct btrfs_block_group_cache *block_group, 97 + struct btrfs_path *path) 98 + { 99 + struct btrfs_key key; 100 + struct btrfs_disk_key disk_key; 101 + struct btrfs_free_space_header *header; 102 + struct btrfs_inode_item *inode_item; 103 + struct extent_buffer *leaf; 104 + u64 objectid; 105 + int ret; 106 + 107 + ret = btrfs_find_free_objectid(trans, root, 0, &objectid); 108 + if (ret < 0) 109 + return ret; 110 + 111 + ret = btrfs_insert_empty_inode(trans, root, path, objectid); 112 + if (ret) 113 + return ret; 114 + 115 + leaf = path->nodes[0]; 116 + inode_item = btrfs_item_ptr(leaf, path->slots[0], 117 + struct btrfs_inode_item); 118 + btrfs_item_key(leaf, &disk_key, path->slots[0]); 119 + memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 120 + sizeof(*inode_item)); 121 + btrfs_set_inode_generation(leaf, inode_item, trans->transid); 122 + btrfs_set_inode_size(leaf, inode_item, 0); 123 + btrfs_set_inode_nbytes(leaf, inode_item, 0); 124 + btrfs_set_inode_uid(leaf, inode_item, 0); 125 + btrfs_set_inode_gid(leaf, inode_item, 0); 126 + btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 127 + btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | 128 + BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 129 + btrfs_set_inode_nlink(leaf, inode_item, 1); 130 + btrfs_set_inode_transid(leaf, inode_item, trans->transid); 131 + btrfs_set_inode_block_group(leaf, inode_item, 132 + block_group->key.objectid); 133 + btrfs_mark_buffer_dirty(leaf); 134 + btrfs_release_path(root, path); 135 + 136 + key.objectid = BTRFS_FREE_SPACE_OBJECTID; 137 + key.offset = block_group->key.objectid; 138 + key.type = 0; 139 + 140 + ret = btrfs_insert_empty_item(trans, root, path, &key, 141 + sizeof(struct btrfs_free_space_header)); 142 + if (ret < 0) { 143 + btrfs_release_path(root, path); 144 + return ret; 145 + } 146 + leaf = path->nodes[0]; 147 + header = btrfs_item_ptr(leaf, path->slots[0], 148 + struct btrfs_free_space_header); 149 + memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 150 + btrfs_set_free_space_key(leaf, header, &disk_key); 151 + btrfs_mark_buffer_dirty(leaf); 152 + btrfs_release_path(root, path); 153 + 154 + return 0; 155 + } 156 + 157 + int btrfs_truncate_free_space_cache(struct btrfs_root *root, 158 + struct btrfs_trans_handle *trans, 159 + struct btrfs_path *path, 160 + struct inode *inode) 161 + { 162 + loff_t oldsize; 163 + int ret = 0; 164 + 165 + trans->block_rsv = root->orphan_block_rsv; 166 + ret = btrfs_block_rsv_check(trans, root, 167 + root->orphan_block_rsv, 168 + 0, 5); 169 + if (ret) 170 + return ret; 171 + 172 + oldsize = i_size_read(inode); 173 + btrfs_i_size_write(inode, 0); 174 + truncate_pagecache(inode, oldsize, 0); 175 + 176 + /* 177 + * We don't need an orphan item because truncating the free space cache 178 + * will never be split across transactions. 179 + */ 180 + ret = btrfs_truncate_inode_items(trans, root, inode, 181 + 0, BTRFS_EXTENT_DATA_KEY); 182 + if (ret) { 183 + WARN_ON(1); 184 + return ret; 185 + } 186 + 187 + return btrfs_update_inode(trans, root, inode); 188 + } 189 + 190 + static int readahead_cache(struct inode *inode) 191 + { 192 + struct file_ra_state *ra; 193 + unsigned long last_index; 194 + 195 + ra = kzalloc(sizeof(*ra), GFP_NOFS); 196 + if (!ra) 197 + return -ENOMEM; 198 + 199 + file_ra_state_init(ra, inode->i_mapping); 200 + last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 201 + 202 + page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 203 + 204 + kfree(ra); 205 + 206 + return 0; 207 + } 208 + 209 + int load_free_space_cache(struct btrfs_fs_info *fs_info, 210 + struct btrfs_block_group_cache *block_group) 211 + { 212 + struct btrfs_root *root = fs_info->tree_root; 213 + struct inode *inode; 214 + struct btrfs_free_space_header *header; 215 + struct extent_buffer *leaf; 216 + struct page *page; 217 + struct btrfs_path *path; 218 + u32 *checksums = NULL, *crc; 219 + char *disk_crcs = NULL; 220 + struct btrfs_key key; 221 + struct list_head bitmaps; 222 + u64 num_entries; 223 + u64 num_bitmaps; 224 + u64 generation; 225 + u32 cur_crc = ~(u32)0; 226 + pgoff_t index = 0; 227 + unsigned long first_page_offset; 228 + int num_checksums; 229 + int ret = 0; 230 + 231 + /* 232 + * If we're unmounting then just return, since this does a search on the 233 + * normal root and not the commit root and we could deadlock. 234 + */ 235 + smp_mb(); 236 + if (fs_info->closing) 237 + return 0; 238 + 239 + /* 240 + * If this block group has been marked to be cleared for one reason or 241 + * another then we can't trust the on disk cache, so just return. 242 + */ 243 + spin_lock(&block_group->lock); 244 + if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 245 + spin_unlock(&block_group->lock); 246 + return 0; 247 + } 248 + spin_unlock(&block_group->lock); 249 + 250 + INIT_LIST_HEAD(&bitmaps); 251 + 252 + path = btrfs_alloc_path(); 253 + if (!path) 254 + return 0; 255 + 256 + inode = lookup_free_space_inode(root, block_group, path); 257 + if (IS_ERR(inode)) { 258 + btrfs_free_path(path); 259 + return 0; 260 + } 261 + 262 + /* Nothing in the space cache, goodbye */ 263 + if (!i_size_read(inode)) { 264 + btrfs_free_path(path); 265 + goto out; 266 + } 267 + 268 + key.objectid = BTRFS_FREE_SPACE_OBJECTID; 269 + key.offset = block_group->key.objectid; 270 + key.type = 0; 271 + 272 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 273 + if (ret) { 274 + btrfs_free_path(path); 275 + goto out; 276 + } 277 + 278 + leaf = path->nodes[0]; 279 + header = btrfs_item_ptr(leaf, path->slots[0], 280 + struct btrfs_free_space_header); 281 + num_entries = btrfs_free_space_entries(leaf, header); 282 + num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 283 + generation = btrfs_free_space_generation(leaf, header); 284 + btrfs_free_path(path); 285 + 286 + if (BTRFS_I(inode)->generation != generation) { 287 + printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 288 + " not match free space cache generation (%llu) for " 289 + "block group %llu\n", 290 + (unsigned long long)BTRFS_I(inode)->generation, 291 + (unsigned long long)generation, 292 + (unsigned long long)block_group->key.objectid); 293 + goto out; 294 + } 295 + 296 + if (!num_entries) 297 + goto out; 298 + 299 + /* Setup everything for doing checksumming */ 300 + num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 301 + checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); 302 + if (!checksums) 303 + goto out; 304 + first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 305 + disk_crcs = kzalloc(first_page_offset, GFP_NOFS); 306 + if (!disk_crcs) 307 + goto out; 308 + 309 + ret = readahead_cache(inode); 310 + if (ret) { 311 + ret = 0; 312 + goto out; 313 + } 314 + 315 + while (1) { 316 + struct btrfs_free_space_entry *entry; 317 + struct btrfs_free_space *e; 318 + void *addr; 319 + unsigned long offset = 0; 320 + unsigned long start_offset = 0; 321 + int need_loop = 0; 322 + 323 + if (!num_entries && !num_bitmaps) 324 + break; 325 + 326 + if (index == 0) { 327 + start_offset = first_page_offset; 328 + offset = start_offset; 329 + } 330 + 331 + page = grab_cache_page(inode->i_mapping, index); 332 + if (!page) { 333 + ret = 0; 334 + goto free_cache; 335 + } 336 + 337 + if (!PageUptodate(page)) { 338 + btrfs_readpage(NULL, page); 339 + lock_page(page); 340 + if (!PageUptodate(page)) { 341 + unlock_page(page); 342 + page_cache_release(page); 343 + printk(KERN_ERR "btrfs: error reading free " 344 + "space cache: %llu\n", 345 + (unsigned long long) 346 + block_group->key.objectid); 347 + goto free_cache; 348 + } 349 + } 350 + addr = kmap(page); 351 + 352 + if (index == 0) { 353 + u64 *gen; 354 + 355 + memcpy(disk_crcs, addr, first_page_offset); 356 + gen = addr + (sizeof(u32) * num_checksums); 357 + if (*gen != BTRFS_I(inode)->generation) { 358 + printk(KERN_ERR "btrfs: space cache generation" 359 + " (%llu) does not match inode (%llu) " 360 + "for block group %llu\n", 361 + (unsigned long long)*gen, 362 + (unsigned long long) 363 + BTRFS_I(inode)->generation, 364 + (unsigned long long) 365 + block_group->key.objectid); 366 + kunmap(page); 367 + unlock_page(page); 368 + page_cache_release(page); 369 + goto free_cache; 370 + } 371 + crc = (u32 *)disk_crcs; 372 + } 373 + entry = addr + start_offset; 374 + 375 + /* First lets check our crc before we do anything fun */ 376 + cur_crc = ~(u32)0; 377 + cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, 378 + PAGE_CACHE_SIZE - start_offset); 379 + btrfs_csum_final(cur_crc, (char *)&cur_crc); 380 + if (cur_crc != *crc) { 381 + printk(KERN_ERR "btrfs: crc mismatch for page %lu in " 382 + "block group %llu\n", index, 383 + (unsigned long long)block_group->key.objectid); 384 + kunmap(page); 385 + unlock_page(page); 386 + page_cache_release(page); 387 + goto free_cache; 388 + } 389 + crc++; 390 + 391 + while (1) { 392 + if (!num_entries) 393 + break; 394 + 395 + need_loop = 1; 396 + e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 397 + if (!e) { 398 + kunmap(page); 399 + unlock_page(page); 400 + page_cache_release(page); 401 + goto free_cache; 402 + } 403 + 404 + e->offset = le64_to_cpu(entry->offset); 405 + e->bytes = le64_to_cpu(entry->bytes); 406 + if (!e->bytes) { 407 + kunmap(page); 408 + kfree(e); 409 + unlock_page(page); 410 + page_cache_release(page); 411 + goto free_cache; 412 + } 413 + 414 + if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 415 + spin_lock(&block_group->tree_lock); 416 + ret = link_free_space(block_group, e); 417 + spin_unlock(&block_group->tree_lock); 418 + BUG_ON(ret); 419 + } else { 420 + e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 421 + if (!e->bitmap) { 422 + kunmap(page); 423 + kfree(e); 424 + unlock_page(page); 425 + page_cache_release(page); 426 + goto free_cache; 427 + } 428 + spin_lock(&block_group->tree_lock); 429 + ret = link_free_space(block_group, e); 430 + block_group->total_bitmaps++; 431 + recalculate_thresholds(block_group); 432 + spin_unlock(&block_group->tree_lock); 433 + list_add_tail(&e->list, &bitmaps); 434 + } 435 + 436 + num_entries--; 437 + offset += sizeof(struct btrfs_free_space_entry); 438 + if (offset + sizeof(struct btrfs_free_space_entry) >= 439 + PAGE_CACHE_SIZE) 440 + break; 441 + entry++; 442 + } 443 + 444 + /* 445 + * We read an entry out of this page, we need to move on to the 446 + * next page. 447 + */ 448 + if (need_loop) { 449 + kunmap(page); 450 + goto next; 451 + } 452 + 453 + /* 454 + * We add the bitmaps at the end of the entries in order that 455 + * the bitmap entries are added to the cache. 456 + */ 457 + e = list_entry(bitmaps.next, struct btrfs_free_space, list); 458 + list_del_init(&e->list); 459 + memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); 460 + kunmap(page); 461 + num_bitmaps--; 462 + next: 463 + unlock_page(page); 464 + page_cache_release(page); 465 + index++; 466 + } 467 + 468 + ret = 1; 469 + out: 470 + kfree(checksums); 471 + kfree(disk_crcs); 472 + iput(inode); 473 + return ret; 474 + 475 + free_cache: 476 + /* This cache is bogus, make sure it gets cleared */ 477 + spin_lock(&block_group->lock); 478 + block_group->disk_cache_state = BTRFS_DC_CLEAR; 479 + spin_unlock(&block_group->lock); 480 + btrfs_remove_free_space_cache(block_group); 481 + goto out; 482 + } 483 + 484 + int btrfs_write_out_cache(struct btrfs_root *root, 485 + struct btrfs_trans_handle *trans, 486 + struct btrfs_block_group_cache *block_group, 487 + struct btrfs_path *path) 488 + { 489 + struct btrfs_free_space_header *header; 490 + struct extent_buffer *leaf; 491 + struct inode *inode; 492 + struct rb_node *node; 493 + struct list_head *pos, *n; 494 + struct page *page; 495 + struct extent_state *cached_state = NULL; 496 + struct list_head bitmap_list; 497 + struct btrfs_key key; 498 + u64 bytes = 0; 499 + u32 *crc, *checksums; 500 + pgoff_t index = 0, last_index = 0; 501 + unsigned long first_page_offset; 502 + int num_checksums; 503 + int entries = 0; 504 + int bitmaps = 0; 505 + int ret = 0; 506 + 507 + root = root->fs_info->tree_root; 508 + 509 + INIT_LIST_HEAD(&bitmap_list); 510 + 511 + spin_lock(&block_group->lock); 512 + if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 513 + spin_unlock(&block_group->lock); 514 + return 0; 515 + } 516 + spin_unlock(&block_group->lock); 517 + 518 + inode = lookup_free_space_inode(root, block_group, path); 519 + if (IS_ERR(inode)) 520 + return 0; 521 + 522 + if (!i_size_read(inode)) { 523 + iput(inode); 524 + return 0; 525 + } 526 + 527 + last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 528 + filemap_write_and_wait(inode->i_mapping); 529 + btrfs_wait_ordered_range(inode, inode->i_size & 530 + ~(root->sectorsize - 1), (u64)-1); 531 + 532 + /* We need a checksum per page. */ 533 + num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 534 + crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); 535 + if (!crc) { 536 + iput(inode); 537 + return 0; 538 + } 539 + 540 + /* Since the first page has all of our checksums and our generation we 541 + * need to calculate the offset into the page that we can start writing 542 + * our entries. 543 + */ 544 + first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 545 + 546 + node = rb_first(&block_group->free_space_offset); 547 + if (!node) 548 + goto out_free; 549 + 550 + /* 551 + * Lock all pages first so we can lock the extent safely. 552 + * 553 + * NOTE: Because we hold the ref the entire time we're going to write to 554 + * the page find_get_page should never fail, so we don't do a check 555 + * after find_get_page at this point. Just putting this here so people 556 + * know and don't freak out. 557 + */ 558 + while (index <= last_index) { 559 + page = grab_cache_page(inode->i_mapping, index); 560 + if (!page) { 561 + pgoff_t i = 0; 562 + 563 + while (i < index) { 564 + page = find_get_page(inode->i_mapping, i); 565 + unlock_page(page); 566 + page_cache_release(page); 567 + page_cache_release(page); 568 + i++; 569 + } 570 + goto out_free; 571 + } 572 + index++; 573 + } 574 + 575 + index = 0; 576 + lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 577 + 0, &cached_state, GFP_NOFS); 578 + 579 + /* Write out the extent entries */ 580 + do { 581 + struct btrfs_free_space_entry *entry; 582 + void *addr; 583 + unsigned long offset = 0; 584 + unsigned long start_offset = 0; 585 + 586 + if (index == 0) { 587 + start_offset = first_page_offset; 588 + offset = start_offset; 589 + } 590 + 591 + page = find_get_page(inode->i_mapping, index); 592 + 593 + addr = kmap(page); 594 + entry = addr + start_offset; 595 + 596 + memset(addr, 0, PAGE_CACHE_SIZE); 597 + while (1) { 598 + struct btrfs_free_space *e; 599 + 600 + e = rb_entry(node, struct btrfs_free_space, offset_index); 601 + entries++; 602 + 603 + entry->offset = cpu_to_le64(e->offset); 604 + entry->bytes = cpu_to_le64(e->bytes); 605 + if (e->bitmap) { 606 + entry->type = BTRFS_FREE_SPACE_BITMAP; 607 + list_add_tail(&e->list, &bitmap_list); 608 + bitmaps++; 609 + } else { 610 + entry->type = BTRFS_FREE_SPACE_EXTENT; 611 + } 612 + node = rb_next(node); 613 + if (!node) 614 + break; 615 + offset += sizeof(struct btrfs_free_space_entry); 616 + if (offset + sizeof(struct btrfs_free_space_entry) >= 617 + PAGE_CACHE_SIZE) 618 + break; 619 + entry++; 620 + } 621 + *crc = ~(u32)0; 622 + *crc = btrfs_csum_data(root, addr + start_offset, *crc, 623 + PAGE_CACHE_SIZE - start_offset); 624 + kunmap(page); 625 + 626 + btrfs_csum_final(*crc, (char *)crc); 627 + crc++; 628 + 629 + bytes += PAGE_CACHE_SIZE; 630 + 631 + ClearPageChecked(page); 632 + set_page_extent_mapped(page); 633 + SetPageUptodate(page); 634 + set_page_dirty(page); 635 + 636 + /* 637 + * We need to release our reference we got for grab_cache_page, 638 + * except for the first page which will hold our checksums, we 639 + * do that below. 640 + */ 641 + if (index != 0) { 642 + unlock_page(page); 643 + page_cache_release(page); 644 + } 645 + 646 + page_cache_release(page); 647 + 648 + index++; 649 + } while (node); 650 + 651 + /* Write out the bitmaps */ 652 + list_for_each_safe(pos, n, &bitmap_list) { 653 + void *addr; 654 + struct btrfs_free_space *entry = 655 + list_entry(pos, struct btrfs_free_space, list); 656 + 657 + page = find_get_page(inode->i_mapping, index); 658 + 659 + addr = kmap(page); 660 + memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 661 + *crc = ~(u32)0; 662 + *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); 663 + kunmap(page); 664 + btrfs_csum_final(*crc, (char *)crc); 665 + crc++; 666 + bytes += PAGE_CACHE_SIZE; 667 + 668 + ClearPageChecked(page); 669 + set_page_extent_mapped(page); 670 + SetPageUptodate(page); 671 + set_page_dirty(page); 672 + unlock_page(page); 673 + page_cache_release(page); 674 + page_cache_release(page); 675 + list_del_init(&entry->list); 676 + index++; 677 + } 678 + 679 + /* Zero out the rest of the pages just to make sure */ 680 + while (index <= last_index) { 681 + void *addr; 682 + 683 + page = find_get_page(inode->i_mapping, index); 684 + 685 + addr = kmap(page); 686 + memset(addr, 0, PAGE_CACHE_SIZE); 687 + kunmap(page); 688 + ClearPageChecked(page); 689 + set_page_extent_mapped(page); 690 + SetPageUptodate(page); 691 + set_page_dirty(page); 692 + unlock_page(page); 693 + page_cache_release(page); 694 + page_cache_release(page); 695 + bytes += PAGE_CACHE_SIZE; 696 + index++; 697 + } 698 + 699 + btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); 700 + 701 + /* Write the checksums and trans id to the first page */ 702 + { 703 + void *addr; 704 + u64 *gen; 705 + 706 + page = find_get_page(inode->i_mapping, 0); 707 + 708 + addr = kmap(page); 709 + memcpy(addr, checksums, sizeof(u32) * num_checksums); 710 + gen = addr + (sizeof(u32) * num_checksums); 711 + *gen = trans->transid; 712 + kunmap(page); 713 + ClearPageChecked(page); 714 + set_page_extent_mapped(page); 715 + SetPageUptodate(page); 716 + set_page_dirty(page); 717 + unlock_page(page); 718 + page_cache_release(page); 719 + page_cache_release(page); 720 + } 721 + BTRFS_I(inode)->generation = trans->transid; 722 + 723 + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 724 + i_size_read(inode) - 1, &cached_state, GFP_NOFS); 725 + 726 + filemap_write_and_wait(inode->i_mapping); 727 + 728 + key.objectid = BTRFS_FREE_SPACE_OBJECTID; 729 + key.offset = block_group->key.objectid; 730 + key.type = 0; 731 + 732 + ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 733 + if (ret < 0) { 734 + ret = 0; 735 + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 736 + EXTENT_DIRTY | EXTENT_DELALLOC | 737 + EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 738 + goto out_free; 739 + } 740 + leaf = path->nodes[0]; 741 + if (ret > 0) { 742 + struct btrfs_key found_key; 743 + BUG_ON(!path->slots[0]); 744 + path->slots[0]--; 745 + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 746 + if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 747 + found_key.offset != block_group->key.objectid) { 748 + ret = 0; 749 + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 750 + EXTENT_DIRTY | EXTENT_DELALLOC | 751 + EXTENT_DO_ACCOUNTING, 0, 0, NULL, 752 + GFP_NOFS); 753 + btrfs_release_path(root, path); 754 + goto out_free; 755 + } 756 + } 757 + header = btrfs_item_ptr(leaf, path->slots[0], 758 + struct btrfs_free_space_header); 759 + btrfs_set_free_space_entries(leaf, header, entries); 760 + btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 761 + btrfs_set_free_space_generation(leaf, header, trans->transid); 762 + btrfs_mark_buffer_dirty(leaf); 763 + btrfs_release_path(root, path); 764 + 765 + ret = 1; 766 + 767 + out_free: 768 + if (ret == 0) { 769 + invalidate_inode_pages2_range(inode->i_mapping, 0, index); 770 + spin_lock(&block_group->lock); 771 + block_group->disk_cache_state = BTRFS_DC_ERROR; 772 + spin_unlock(&block_group->lock); 773 + BTRFS_I(inode)->generation = 0; 774 + } 775 + kfree(checksums); 776 + btrfs_update_inode(trans, root, inode); 777 + iput(inode); 778 + return ret; 779 + } 780 781 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 782 u64 offset)

+18

fs/btrfs/free-space-cache.h

··· 27 struct list_head list; 28 }; 29 30 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 31 u64 bytenr, u64 size); 32 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,

··· 27 struct list_head list; 28 }; 29 30 + struct inode *lookup_free_space_inode(struct btrfs_root *root, 31 + struct btrfs_block_group_cache 32 + *block_group, struct btrfs_path *path); 33 + int create_free_space_inode(struct btrfs_root *root, 34 + struct btrfs_trans_handle *trans, 35 + struct btrfs_block_group_cache *block_group, 36 + struct btrfs_path *path); 37 + 38 + int btrfs_truncate_free_space_cache(struct btrfs_root *root, 39 + struct btrfs_trans_handle *trans, 40 + struct btrfs_path *path, 41 + struct inode *inode); 42 + int load_free_space_cache(struct btrfs_fs_info *fs_info, 43 + struct btrfs_block_group_cache *block_group); 44 + int btrfs_write_out_cache(struct btrfs_root *root, 45 + struct btrfs_trans_handle *trans, 46 + struct btrfs_block_group_cache *block_group, 47 + struct btrfs_path *path); 48 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 49 u64 bytenr, u64 size); 50 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,

+153 -49

fs/btrfs/inode.c

··· 319 struct btrfs_root *root = BTRFS_I(inode)->root; 320 struct btrfs_trans_handle *trans; 321 u64 num_bytes; 322 - u64 orig_start; 323 - u64 disk_num_bytes; 324 u64 blocksize = root->sectorsize; 325 u64 actual_end; 326 u64 isize = i_size_read(inode); ··· 332 unsigned long max_uncompressed = 128 * 1024; 333 int i; 334 int will_compress; 335 - 336 - orig_start = start; 337 338 actual_end = min_t(u64, isize, end + 1); 339 again: ··· 367 total_compressed = min(total_compressed, max_uncompressed); 368 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 369 num_bytes = max(blocksize, num_bytes); 370 - disk_num_bytes = num_bytes; 371 total_in = 0; 372 ret = 0; 373 ··· 462 if (total_compressed >= total_in) { 463 will_compress = 0; 464 } else { 465 - disk_num_bytes = total_compressed; 466 num_bytes = total_in; 467 } 468 } ··· 751 u64 disk_num_bytes; 752 u64 cur_alloc_size; 753 u64 blocksize = root->sectorsize; 754 - u64 actual_end; 755 - u64 isize = i_size_read(inode); 756 struct btrfs_key ins; 757 struct extent_map *em; 758 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 759 int ret = 0; 760 761 trans = btrfs_join_transaction(root, 1); 762 BUG_ON(!trans); 763 btrfs_set_trans_block_group(trans, inode); 764 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 765 - 766 - actual_end = min_t(u64, isize, end + 1); 767 768 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 769 num_bytes = max(blocksize, num_bytes); ··· 1026 int type; 1027 int nocow; 1028 int check_prev = 1; 1029 1030 path = btrfs_alloc_path(); 1031 BUG_ON(!path); 1032 - trans = btrfs_join_transaction(root, 1); 1033 BUG_ON(!trans); 1034 1035 cow_start = (u64)-1; ··· 1208 BUG_ON(ret); 1209 } 1210 1211 - ret = btrfs_end_transaction(trans, root); 1212 - BUG_ON(ret); 1213 btrfs_free_path(path); 1214 return 0; 1215 } ··· 1291 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1292 struct btrfs_root *root = BTRFS_I(inode)->root; 1293 u64 len = state->end + 1 - state->start; 1294 1295 if (*bits & EXTENT_FIRST_DELALLOC) 1296 *bits &= ~EXTENT_FIRST_DELALLOC; ··· 1302 spin_lock(&root->fs_info->delalloc_lock); 1303 BTRFS_I(inode)->delalloc_bytes += len; 1304 root->fs_info->delalloc_bytes += len; 1305 - if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1306 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1307 &root->fs_info->delalloc_inodes); 1308 } ··· 1325 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1326 struct btrfs_root *root = BTRFS_I(inode)->root; 1327 u64 len = state->end + 1 - state->start; 1328 1329 if (*bits & EXTENT_FIRST_DELALLOC) 1330 *bits &= ~EXTENT_FIRST_DELALLOC; ··· 1336 if (*bits & EXTENT_DO_ACCOUNTING) 1337 btrfs_delalloc_release_metadata(inode, len); 1338 1339 - if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1340 btrfs_free_reserved_data_space(inode, len); 1341 1342 spin_lock(&root->fs_info->delalloc_lock); 1343 root->fs_info->delalloc_bytes -= len; 1344 BTRFS_I(inode)->delalloc_bytes -= len; 1345 1346 - if (BTRFS_I(inode)->delalloc_bytes == 0 && 1347 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1348 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1349 } ··· 1379 1380 if (map_length < length + size) 1381 return 1; 1382 - return 0; 1383 } 1384 1385 /* ··· 1433 1434 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1435 1436 - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1437 BUG_ON(ret); 1438 1439 if (!(rw & REQ_WRITE)) { ··· 1672 struct extent_state *cached_state = NULL; 1673 int compressed = 0; 1674 int ret; 1675 1676 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1677 end - start + 1); ··· 1680 return 0; 1681 BUG_ON(!ordered_extent); 1682 1683 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1684 BUG_ON(!list_empty(&ordered_extent->list)); 1685 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1686 if (!ret) { 1687 - trans = btrfs_join_transaction(root, 1); 1688 btrfs_set_trans_block_group(trans, inode); 1689 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1690 ret = btrfs_update_inode(trans, root, inode); ··· 1703 ordered_extent->file_offset + ordered_extent->len - 1, 1704 0, &cached_state, GFP_NOFS); 1705 1706 - trans = btrfs_join_transaction(root, 1); 1707 btrfs_set_trans_block_group(trans, inode); 1708 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1709 ··· 1720 ordered_extent->len); 1721 BUG_ON(ret); 1722 } else { 1723 ret = insert_reserved_file_extent(trans, inode, 1724 ordered_extent->file_offset, 1725 ordered_extent->start, ··· 1745 ret = btrfs_update_inode(trans, root, inode); 1746 BUG_ON(ret); 1747 out: 1748 - btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1749 - if (trans) 1750 - btrfs_end_transaction(trans, root); 1751 /* once for us */ 1752 btrfs_put_ordered_extent(ordered_extent); 1753 /* once for the tree */ ··· 2264 { 2265 struct btrfs_path *path; 2266 struct extent_buffer *leaf; 2267 - struct btrfs_item *item; 2268 struct btrfs_key key, found_key; 2269 struct btrfs_trans_handle *trans; 2270 struct inode *inode; ··· 2301 2302 /* pull out the item */ 2303 leaf = path->nodes[0]; 2304 - item = btrfs_item_nr(leaf, path->slots[0]); 2305 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2306 2307 /* make sure the item matches what we want */ ··· 2676 2677 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2678 dir, index); 2679 - BUG_ON(ret); 2680 err: 2681 btrfs_free_path(path); 2682 if (ret) ··· 2698 { 2699 struct extent_buffer *eb; 2700 int level; 2701 - int ret; 2702 u64 refs = 1; 2703 2704 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2705 if (!path->nodes[level]) ··· 2712 if (refs > 1) 2713 return 1; 2714 } 2715 - return 0; 2716 } 2717 2718 /* ··· 3222 3223 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3224 3225 - if (root->ref_cows) 3226 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3227 3228 path = btrfs_alloc_path(); ··· 3370 } else { 3371 break; 3372 } 3373 - if (found_extent && root->ref_cows) { 3374 btrfs_set_path_blocking(path); 3375 ret = btrfs_free_extent(trans, root, extent_start, 3376 extent_num_bytes, 0, ··· 3702 int ret; 3703 3704 truncate_inode_pages(&inode->i_data, 0); 3705 - if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) 3706 goto no_delete; 3707 3708 if (is_bad_inode(inode)) { ··· 3916 } 3917 spin_unlock(&root->inode_lock); 3918 3919 - if (empty && btrfs_root_refs(&root->root_item) == 0) { 3920 synchronize_srcu(&root->fs_info->subvol_srcu); 3921 spin_lock(&root->inode_lock); 3922 empty = RB_EMPTY_ROOT(&root->inode_tree); ··· 4317 struct btrfs_root *root = BTRFS_I(inode)->root; 4318 struct btrfs_trans_handle *trans; 4319 int ret = 0; 4320 4321 if (BTRFS_I(inode)->dummy_inode) 4322 return 0; 4323 4324 if (wbc->sync_mode == WB_SYNC_ALL) { 4325 - trans = btrfs_join_transaction(root, 1); 4326 btrfs_set_trans_block_group(trans, inode); 4327 - ret = btrfs_commit_transaction(trans, root); 4328 } 4329 return ret; 4330 } ··· 5690 struct btrfs_root *root = BTRFS_I(inode)->root; 5691 struct btrfs_dio_private *dip; 5692 struct bio_vec *bvec = bio->bi_io_vec; 5693 - u64 start; 5694 int skip_sum; 5695 int write = rw & REQ_WRITE; 5696 int ret = 0; ··· 5715 dip->inode = inode; 5716 dip->logical_offset = file_offset; 5717 5718 - start = dip->logical_offset; 5719 dip->bytes = 0; 5720 do { 5721 dip->bytes += bvec->bv_len; ··· 6351 spin_unlock(&root->fs_info->ordered_extent_lock); 6352 } 6353 6354 spin_lock(&root->orphan_lock); 6355 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6356 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", ··· 6398 { 6399 struct btrfs_root *root = BTRFS_I(inode)->root; 6400 6401 - if (btrfs_root_refs(&root->root_item) == 0) 6402 return 1; 6403 else 6404 return generic_drop_inode(inode); ··· 6668 return 0; 6669 } 6670 6671 - int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) 6672 { 6673 struct btrfs_inode *binode; 6674 struct inode *inode = NULL; ··· 6691 spin_unlock(&root->fs_info->delalloc_lock); 6692 6693 if (inode) { 6694 - write_inode_now(inode, 0); 6695 if (delay_iput) 6696 btrfs_add_delayed_iput(inode); 6697 else ··· 6836 return err; 6837 } 6838 6839 - int btrfs_prealloc_file_range(struct inode *inode, int mode, 6840 - u64 start, u64 num_bytes, u64 min_size, 6841 - loff_t actual_len, u64 *alloc_hint) 6842 { 6843 - struct btrfs_trans_handle *trans; 6844 struct btrfs_root *root = BTRFS_I(inode)->root; 6845 struct btrfs_key ins; 6846 u64 cur_offset = start; 6847 int ret = 0; 6848 6849 while (num_bytes > 0) { 6850 - trans = btrfs_start_transaction(root, 3); 6851 - if (IS_ERR(trans)) { 6852 - ret = PTR_ERR(trans); 6853 - break; 6854 } 6855 6856 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6857 0, *alloc_hint, (u64)-1, &ins, 1); 6858 if (ret) { 6859 - btrfs_end_transaction(trans, root); 6860 break; 6861 } 6862 ··· 6895 ret = btrfs_update_inode(trans, root, inode); 6896 BUG_ON(ret); 6897 6898 - btrfs_end_transaction(trans, root); 6899 } 6900 return ret; 6901 } 6902 6903 static long btrfs_fallocate(struct inode *inode, int mode,

··· 319 struct btrfs_root *root = BTRFS_I(inode)->root; 320 struct btrfs_trans_handle *trans; 321 u64 num_bytes; 322 u64 blocksize = root->sectorsize; 323 u64 actual_end; 324 u64 isize = i_size_read(inode); ··· 334 unsigned long max_uncompressed = 128 * 1024; 335 int i; 336 int will_compress; 337 338 actual_end = min_t(u64, isize, end + 1); 339 again: ··· 371 total_compressed = min(total_compressed, max_uncompressed); 372 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 373 num_bytes = max(blocksize, num_bytes); 374 total_in = 0; 375 ret = 0; 376 ··· 467 if (total_compressed >= total_in) { 468 will_compress = 0; 469 } else { 470 num_bytes = total_in; 471 } 472 } ··· 757 u64 disk_num_bytes; 758 u64 cur_alloc_size; 759 u64 blocksize = root->sectorsize; 760 struct btrfs_key ins; 761 struct extent_map *em; 762 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 763 int ret = 0; 764 765 + BUG_ON(root == root->fs_info->tree_root); 766 trans = btrfs_join_transaction(root, 1); 767 BUG_ON(!trans); 768 btrfs_set_trans_block_group(trans, inode); 769 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 770 771 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 772 num_bytes = max(blocksize, num_bytes); ··· 1035 int type; 1036 int nocow; 1037 int check_prev = 1; 1038 + bool nolock = false; 1039 1040 path = btrfs_alloc_path(); 1041 BUG_ON(!path); 1042 + if (root == root->fs_info->tree_root) { 1043 + nolock = true; 1044 + trans = btrfs_join_transaction_nolock(root, 1); 1045 + } else { 1046 + trans = btrfs_join_transaction(root, 1); 1047 + } 1048 BUG_ON(!trans); 1049 1050 cow_start = (u64)-1; ··· 1211 BUG_ON(ret); 1212 } 1213 1214 + if (nolock) { 1215 + ret = btrfs_end_transaction_nolock(trans, root); 1216 + BUG_ON(ret); 1217 + } else { 1218 + ret = btrfs_end_transaction(trans, root); 1219 + BUG_ON(ret); 1220 + } 1221 btrfs_free_path(path); 1222 return 0; 1223 } ··· 1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1291 u64 len = state->end + 1 - state->start; 1292 + int do_list = (root->root_key.objectid != 1293 + BTRFS_ROOT_TREE_OBJECTID); 1294 1295 if (*bits & EXTENT_FIRST_DELALLOC) 1296 *bits &= ~EXTENT_FIRST_DELALLOC; ··· 1298 spin_lock(&root->fs_info->delalloc_lock); 1299 BTRFS_I(inode)->delalloc_bytes += len; 1300 root->fs_info->delalloc_bytes += len; 1301 + if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1303 &root->fs_info->delalloc_inodes); 1304 } ··· 1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1323 u64 len = state->end + 1 - state->start; 1324 + int do_list = (root->root_key.objectid != 1325 + BTRFS_ROOT_TREE_OBJECTID); 1326 1327 if (*bits & EXTENT_FIRST_DELALLOC) 1328 *bits &= ~EXTENT_FIRST_DELALLOC; ··· 1330 if (*bits & EXTENT_DO_ACCOUNTING) 1331 btrfs_delalloc_release_metadata(inode, len); 1332 1333 + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1334 + && do_list) 1335 btrfs_free_reserved_data_space(inode, len); 1336 1337 spin_lock(&root->fs_info->delalloc_lock); 1338 root->fs_info->delalloc_bytes -= len; 1339 BTRFS_I(inode)->delalloc_bytes -= len; 1340 1341 + if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1342 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1343 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1344 } ··· 1372 1373 if (map_length < length + size) 1374 return 1; 1375 + return ret; 1376 } 1377 1378 /* ··· 1426 1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1428 1429 + if (root == root->fs_info->tree_root) 1430 + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1431 + else 1432 + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1433 BUG_ON(ret); 1434 1435 if (!(rw & REQ_WRITE)) { ··· 1662 struct extent_state *cached_state = NULL; 1663 int compressed = 0; 1664 int ret; 1665 + bool nolock = false; 1666 1667 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1668 end - start + 1); ··· 1669 return 0; 1670 BUG_ON(!ordered_extent); 1671 1672 + nolock = (root == root->fs_info->tree_root); 1673 + 1674 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1675 BUG_ON(!list_empty(&ordered_extent->list)); 1676 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1677 if (!ret) { 1678 + if (nolock) 1679 + trans = btrfs_join_transaction_nolock(root, 1); 1680 + else 1681 + trans = btrfs_join_transaction(root, 1); 1682 + BUG_ON(!trans); 1683 btrfs_set_trans_block_group(trans, inode); 1684 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1685 ret = btrfs_update_inode(trans, root, inode); ··· 1686 ordered_extent->file_offset + ordered_extent->len - 1, 1687 0, &cached_state, GFP_NOFS); 1688 1689 + if (nolock) 1690 + trans = btrfs_join_transaction_nolock(root, 1); 1691 + else 1692 + trans = btrfs_join_transaction(root, 1); 1693 btrfs_set_trans_block_group(trans, inode); 1694 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1695 ··· 1700 ordered_extent->len); 1701 BUG_ON(ret); 1702 } else { 1703 + BUG_ON(root == root->fs_info->tree_root); 1704 ret = insert_reserved_file_extent(trans, inode, 1705 ordered_extent->file_offset, 1706 ordered_extent->start, ··· 1724 ret = btrfs_update_inode(trans, root, inode); 1725 BUG_ON(ret); 1726 out: 1727 + if (nolock) { 1728 + if (trans) 1729 + btrfs_end_transaction_nolock(trans, root); 1730 + } else { 1731 + btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1732 + if (trans) 1733 + btrfs_end_transaction(trans, root); 1734 + } 1735 + 1736 /* once for us */ 1737 btrfs_put_ordered_extent(ordered_extent); 1738 /* once for the tree */ ··· 2237 { 2238 struct btrfs_path *path; 2239 struct extent_buffer *leaf; 2240 struct btrfs_key key, found_key; 2241 struct btrfs_trans_handle *trans; 2242 struct inode *inode; ··· 2275 2276 /* pull out the item */ 2277 leaf = path->nodes[0]; 2278 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2279 2280 /* make sure the item matches what we want */ ··· 2651 2652 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2653 dir, index); 2654 + if (ret == -ENOENT) 2655 + ret = 0; 2656 err: 2657 btrfs_free_path(path); 2658 if (ret) ··· 2672 { 2673 struct extent_buffer *eb; 2674 int level; 2675 u64 refs = 1; 2676 + int uninitialized_var(ret); 2677 2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2679 if (!path->nodes[level]) ··· 2686 if (refs > 1) 2687 return 1; 2688 } 2689 + return ret; /* XXX callers? */ 2690 } 2691 2692 /* ··· 3196 3197 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3198 3199 + if (root->ref_cows || root == root->fs_info->tree_root) 3200 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3201 3202 path = btrfs_alloc_path(); ··· 3344 } else { 3345 break; 3346 } 3347 + if (found_extent && (root->ref_cows || 3348 + root == root->fs_info->tree_root)) { 3349 btrfs_set_path_blocking(path); 3350 ret = btrfs_free_extent(trans, root, extent_start, 3351 extent_num_bytes, 0, ··· 3675 int ret; 3676 3677 truncate_inode_pages(&inode->i_data, 0); 3678 + if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3679 + root == root->fs_info->tree_root)) 3680 goto no_delete; 3681 3682 if (is_bad_inode(inode)) { ··· 3888 } 3889 spin_unlock(&root->inode_lock); 3890 3891 + /* 3892 + * Free space cache has inodes in the tree root, but the tree root has a 3893 + * root_refs of 0, so this could end up dropping the tree root as a 3894 + * snapshot, so we need the extra !root->fs_info->tree_root check to 3895 + * make sure we don't drop it. 3896 + */ 3897 + if (empty && btrfs_root_refs(&root->root_item) == 0 && 3898 + root != root->fs_info->tree_root) { 3899 synchronize_srcu(&root->fs_info->subvol_srcu); 3900 spin_lock(&root->inode_lock); 3901 empty = RB_EMPTY_ROOT(&root->inode_tree); ··· 4282 struct btrfs_root *root = BTRFS_I(inode)->root; 4283 struct btrfs_trans_handle *trans; 4284 int ret = 0; 4285 + bool nolock = false; 4286 4287 if (BTRFS_I(inode)->dummy_inode) 4288 return 0; 4289 4290 + smp_mb(); 4291 + nolock = (root->fs_info->closing && root == root->fs_info->tree_root); 4292 + 4293 if (wbc->sync_mode == WB_SYNC_ALL) { 4294 + if (nolock) 4295 + trans = btrfs_join_transaction_nolock(root, 1); 4296 + else 4297 + trans = btrfs_join_transaction(root, 1); 4298 btrfs_set_trans_block_group(trans, inode); 4299 + if (nolock) 4300 + ret = btrfs_end_transaction_nolock(trans, root); 4301 + else 4302 + ret = btrfs_commit_transaction(trans, root); 4303 } 4304 return ret; 4305 } ··· 5645 struct btrfs_root *root = BTRFS_I(inode)->root; 5646 struct btrfs_dio_private *dip; 5647 struct bio_vec *bvec = bio->bi_io_vec; 5648 int skip_sum; 5649 int write = rw & REQ_WRITE; 5650 int ret = 0; ··· 5671 dip->inode = inode; 5672 dip->logical_offset = file_offset; 5673 5674 dip->bytes = 0; 5675 do { 5676 dip->bytes += bvec->bv_len; ··· 6308 spin_unlock(&root->fs_info->ordered_extent_lock); 6309 } 6310 6311 + if (root == root->fs_info->tree_root) { 6312 + struct btrfs_block_group_cache *block_group; 6313 + 6314 + block_group = btrfs_lookup_block_group(root->fs_info, 6315 + BTRFS_I(inode)->block_group); 6316 + if (block_group && block_group->inode == inode) { 6317 + spin_lock(&block_group->lock); 6318 + block_group->inode = NULL; 6319 + spin_unlock(&block_group->lock); 6320 + btrfs_put_block_group(block_group); 6321 + } else if (block_group) { 6322 + btrfs_put_block_group(block_group); 6323 + } 6324 + } 6325 + 6326 spin_lock(&root->orphan_lock); 6327 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6328 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", ··· 6340 { 6341 struct btrfs_root *root = BTRFS_I(inode)->root; 6342 6343 + if (btrfs_root_refs(&root->root_item) == 0 && 6344 + root != root->fs_info->tree_root) 6345 return 1; 6346 else 6347 return generic_drop_inode(inode); ··· 6609 return 0; 6610 } 6611 6612 + int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, 6613 + int sync) 6614 { 6615 struct btrfs_inode *binode; 6616 struct inode *inode = NULL; ··· 6631 spin_unlock(&root->fs_info->delalloc_lock); 6632 6633 if (inode) { 6634 + if (sync) { 6635 + filemap_write_and_wait(inode->i_mapping); 6636 + /* 6637 + * We have to do this because compression doesn't 6638 + * actually set PG_writeback until it submits the pages 6639 + * for IO, which happens in an async thread, so we could 6640 + * race and not actually wait for any writeback pages 6641 + * because they've not been submitted yet. Technically 6642 + * this could still be the case for the ordered stuff 6643 + * since the async thread may not have started to do its 6644 + * work yet. If this becomes the case then we need to 6645 + * figure out a way to make sure that in writepage we 6646 + * wait for any async pages to be submitted before 6647 + * returning so that fdatawait does what its supposed to 6648 + * do. 6649 + */ 6650 + btrfs_wait_ordered_range(inode, 0, (u64)-1); 6651 + } else { 6652 + filemap_flush(inode->i_mapping); 6653 + } 6654 if (delay_iput) 6655 btrfs_add_delayed_iput(inode); 6656 else ··· 6757 return err; 6758 } 6759 6760 + static int __btrfs_prealloc_file_range(struct inode *inode, int mode, 6761 + u64 start, u64 num_bytes, u64 min_size, 6762 + loff_t actual_len, u64 *alloc_hint, 6763 + struct btrfs_trans_handle *trans) 6764 { 6765 struct btrfs_root *root = BTRFS_I(inode)->root; 6766 struct btrfs_key ins; 6767 u64 cur_offset = start; 6768 int ret = 0; 6769 + bool own_trans = true; 6770 6771 + if (trans) 6772 + own_trans = false; 6773 while (num_bytes > 0) { 6774 + if (own_trans) { 6775 + trans = btrfs_start_transaction(root, 3); 6776 + if (IS_ERR(trans)) { 6777 + ret = PTR_ERR(trans); 6778 + break; 6779 + } 6780 } 6781 6782 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6783 0, *alloc_hint, (u64)-1, &ins, 1); 6784 if (ret) { 6785 + if (own_trans) 6786 + btrfs_end_transaction(trans, root); 6787 break; 6788 } 6789 ··· 6810 ret = btrfs_update_inode(trans, root, inode); 6811 BUG_ON(ret); 6812 6813 + if (own_trans) 6814 + btrfs_end_transaction(trans, root); 6815 } 6816 return ret; 6817 + } 6818 + 6819 + int btrfs_prealloc_file_range(struct inode *inode, int mode, 6820 + u64 start, u64 num_bytes, u64 min_size, 6821 + loff_t actual_len, u64 *alloc_hint) 6822 + { 6823 + return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, 6824 + min_size, actual_len, alloc_hint, 6825 + NULL); 6826 + } 6827 + 6828 + int btrfs_prealloc_file_range_trans(struct inode *inode, 6829 + struct btrfs_trans_handle *trans, int mode, 6830 + u64 start, u64 num_bytes, u64 min_size, 6831 + loff_t actual_len, u64 *alloc_hint) 6832 + { 6833 + return __btrfs_prealloc_file_range(inode, mode, start, num_bytes, 6834 + min_size, actual_len, alloc_hint, trans); 6835 } 6836 6837 static long btrfs_fallocate(struct inode *inode, int mode,

+319 -77

fs/btrfs/ioctl.c

··· 224 225 static noinline int create_subvol(struct btrfs_root *root, 226 struct dentry *dentry, 227 - char *name, int namelen) 228 { 229 struct btrfs_trans_handle *trans; 230 struct btrfs_key key; ··· 339 340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 341 fail: 342 - err = btrfs_commit_transaction(trans, root); 343 if (err && !ret) 344 ret = err; 345 return ret; 346 } 347 348 - static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) 349 { 350 struct inode *inode; 351 struct btrfs_pending_snapshot *pending_snapshot; ··· 380 381 list_add(&pending_snapshot->list, 382 &trans->transaction->pending_snapshots); 383 - ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); 384 BUG_ON(ret); 385 386 ret = pending_snapshot->error; ··· 409 return ret; 410 } 411 412 /* copy of may_create in fs/namei.c() */ 413 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 414 { ··· 496 */ 497 static noinline int btrfs_mksubvol(struct path *parent, 498 char *name, int namelen, 499 - struct btrfs_root *snap_src) 500 { 501 struct inode *dir = parent->dentry->d_inode; 502 struct dentry *dentry; ··· 528 goto out_up_read; 529 530 if (snap_src) { 531 - error = create_snapshot(snap_src, dentry); 532 } else { 533 error = create_subvol(BTRFS_I(dir)->root, dentry, 534 - name, namelen); 535 } 536 if (!error) 537 fsnotify_mkdir(dir, dentry); ··· 794 char *sizestr; 795 char *devstr = NULL; 796 int ret = 0; 797 - int namelen; 798 int mod = 0; 799 800 if (root->fs_info->sb->s_flags & MS_RDONLY) ··· 807 return PTR_ERR(vol_args); 808 809 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 810 - namelen = strlen(vol_args->name); 811 812 mutex_lock(&root->fs_info->volume_mutex); 813 sizestr = vol_args->name; ··· 885 return ret; 886 } 887 888 - static noinline int btrfs_ioctl_snap_create(struct file *file, 889 - void __user *arg, int subvol) 890 { 891 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 892 - struct btrfs_ioctl_vol_args *vol_args; 893 struct file *src_file; 894 int namelen; 895 int ret = 0; ··· 899 if (root->fs_info->sb->s_flags & MS_RDONLY) 900 return -EROFS; 901 902 - vol_args = memdup_user(arg, sizeof(*vol_args)); 903 - if (IS_ERR(vol_args)) 904 - return PTR_ERR(vol_args); 905 - 906 - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 907 - namelen = strlen(vol_args->name); 908 - if (strchr(vol_args->name, '/')) { 909 ret = -EINVAL; 910 goto out; 911 } 912 913 if (subvol) { 914 - ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 915 - NULL); 916 } else { 917 struct inode *src_inode; 918 - src_file = fget(vol_args->fd); 919 if (!src_file) { 920 ret = -EINVAL; 921 goto out; ··· 924 fput(src_file); 925 goto out; 926 } 927 - ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 928 - BTRFS_I(src_inode)->root); 929 fput(src_file); 930 } 931 out: 932 kfree(vol_args); 933 return ret; 934 } 935 ··· 1198 if (!capable(CAP_SYS_ADMIN)) 1199 return -EPERM; 1200 1201 - args = kmalloc(sizeof(*args), GFP_KERNEL); 1202 - if (!args) 1203 - return -ENOMEM; 1204 1205 - if (copy_from_user(args, argp, sizeof(*args))) { 1206 - kfree(args); 1207 - return -EFAULT; 1208 - } 1209 inode = fdentry(file)->d_inode; 1210 ret = search_ioctl(inode, args); 1211 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ··· 1309 if (!capable(CAP_SYS_ADMIN)) 1310 return -EPERM; 1311 1312 - args = kmalloc(sizeof(*args), GFP_KERNEL); 1313 - if (!args) 1314 - return -ENOMEM; 1315 1316 - if (copy_from_user(args, argp, sizeof(*args))) { 1317 - kfree(args); 1318 - return -EFAULT; 1319 - } 1320 inode = fdentry(file)->d_inode; 1321 1322 if (args->treeid == 0) ··· 1344 int ret; 1345 int err = 0; 1346 1347 - if (!capable(CAP_SYS_ADMIN)) 1348 - return -EPERM; 1349 - 1350 vol_args = memdup_user(arg, sizeof(*vol_args)); 1351 if (IS_ERR(vol_args)) 1352 return PTR_ERR(vol_args); ··· 1373 } 1374 1375 inode = dentry->d_inode; 1376 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1377 err = -EINVAL; 1378 goto out_dput; 1379 } 1380 - 1381 - dest = BTRFS_I(inode)->root; 1382 1383 mutex_lock(&inode->i_mutex); 1384 err = d_invalidate(dentry); ··· 1456 BUG_ON(ret); 1457 } 1458 1459 - ret = btrfs_commit_transaction(trans, root); 1460 BUG_ON(ret); 1461 inode->i_flags |= S_DEAD; 1462 out_up_write: ··· 1654 path->reada = 2; 1655 1656 if (inode < src) { 1657 - mutex_lock(&inode->i_mutex); 1658 - mutex_lock(&src->i_mutex); 1659 } else { 1660 - mutex_lock(&src->i_mutex); 1661 - mutex_lock(&inode->i_mutex); 1662 } 1663 1664 /* determine range to clone */ ··· 1682 while (1) { 1683 struct btrfs_ordered_extent *ordered; 1684 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1685 - ordered = btrfs_lookup_first_ordered_extent(inode, off+len); 1686 - if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) 1687 break; 1688 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1689 if (ordered) 1690 btrfs_put_ordered_extent(ordered); 1691 - btrfs_wait_ordered_range(src, off, off+len); 1692 } 1693 1694 /* clone data */ ··· 1759 } 1760 btrfs_release_path(root, path); 1761 1762 - if (key.offset + datal < off || 1763 key.offset >= off+len) 1764 goto next; 1765 ··· 2033 return 0; 2034 } 2035 2036 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2037 { 2038 struct btrfs_ioctl_space_args space_args; ··· 2057 struct btrfs_ioctl_space_info *dest_orig; 2058 struct btrfs_ioctl_space_info *user_dest; 2059 struct btrfs_space_info *info; 2060 int alloc_size; 2061 int ret = 0; 2062 int slot_count = 0; 2063 2064 if (copy_from_user(&space_args, 2065 (struct btrfs_ioctl_space_args __user *)arg, 2066 sizeof(space_args))) 2067 return -EFAULT; 2068 2069 - /* first we count slots */ 2070 - rcu_read_lock(); 2071 - list_for_each_entry_rcu(info, &root->fs_info->space_info, list) 2072 - slot_count++; 2073 - rcu_read_unlock(); 2074 2075 /* space_slots == 0 means they are asking for a count */ 2076 if (space_args.space_slots == 0) { 2077 space_args.total_spaces = slot_count; 2078 goto out; 2079 } 2080 alloc_size = sizeof(*dest) * slot_count; 2081 /* we generally have at most 6 or so space infos, one for each raid 2082 * level. So, a whole page should be more than enough for everyone 2083 */ ··· 2120 dest_orig = dest; 2121 2122 /* now we have a buffer to copy into */ 2123 - rcu_read_lock(); 2124 - list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { 2125 - /* make sure we don't copy more than we allocated 2126 - * in our buffer 2127 - */ 2128 - if (slot_count == 0) 2129 - break; 2130 - slot_count--; 2131 2132 - /* make sure userland has enough room in their buffer */ 2133 - if (space_args.total_spaces >= space_args.space_slots) 2134 - break; 2135 2136 - space.flags = info->flags; 2137 - space.total_bytes = info->total_bytes; 2138 - space.used_bytes = info->bytes_used; 2139 - memcpy(dest, &space, sizeof(space)); 2140 - dest++; 2141 - space_args.total_spaces++; 2142 } 2143 - rcu_read_unlock(); 2144 2145 user_dest = (struct btrfs_ioctl_space_info *) 2146 (arg + sizeof(struct btrfs_ioctl_space_args)); ··· 2190 return 0; 2191 } 2192 2193 long btrfs_ioctl(struct file *file, unsigned int 2194 cmd, unsigned long arg) 2195 { ··· 2234 case FS_IOC_GETVERSION: 2235 return btrfs_ioctl_getversion(file, argp); 2236 case BTRFS_IOC_SNAP_CREATE: 2237 - return btrfs_ioctl_snap_create(file, argp, 0); 2238 case BTRFS_IOC_SUBVOL_CREATE: 2239 - return btrfs_ioctl_snap_create(file, argp, 1); 2240 case BTRFS_IOC_SNAP_DESTROY: 2241 return btrfs_ioctl_snap_destroy(file, argp); 2242 case BTRFS_IOC_DEFAULT_SUBVOL: ··· 2272 case BTRFS_IOC_SYNC: 2273 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2274 return 0; 2275 } 2276 2277 return -ENOTTY;

··· 224 225 static noinline int create_subvol(struct btrfs_root *root, 226 struct dentry *dentry, 227 + char *name, int namelen, 228 + u64 *async_transid) 229 { 230 struct btrfs_trans_handle *trans; 231 struct btrfs_key key; ··· 338 339 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 340 fail: 341 + if (async_transid) { 342 + *async_transid = trans->transid; 343 + err = btrfs_commit_transaction_async(trans, root, 1); 344 + } else { 345 + err = btrfs_commit_transaction(trans, root); 346 + } 347 if (err && !ret) 348 ret = err; 349 return ret; 350 } 351 352 + static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 353 + char *name, int namelen, u64 *async_transid) 354 { 355 struct inode *inode; 356 struct btrfs_pending_snapshot *pending_snapshot; ··· 373 374 list_add(&pending_snapshot->list, 375 &trans->transaction->pending_snapshots); 376 + if (async_transid) { 377 + *async_transid = trans->transid; 378 + ret = btrfs_commit_transaction_async(trans, 379 + root->fs_info->extent_root, 1); 380 + } else { 381 + ret = btrfs_commit_transaction(trans, 382 + root->fs_info->extent_root); 383 + } 384 BUG_ON(ret); 385 386 ret = pending_snapshot->error; ··· 395 return ret; 396 } 397 398 + /* copy of check_sticky in fs/namei.c() 399 + * It's inline, so penalty for filesystems that don't use sticky bit is 400 + * minimal. 401 + */ 402 + static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 403 + { 404 + uid_t fsuid = current_fsuid(); 405 + 406 + if (!(dir->i_mode & S_ISVTX)) 407 + return 0; 408 + if (inode->i_uid == fsuid) 409 + return 0; 410 + if (dir->i_uid == fsuid) 411 + return 0; 412 + return !capable(CAP_FOWNER); 413 + } 414 + 415 + /* copy of may_delete in fs/namei.c() 416 + * Check whether we can remove a link victim from directory dir, check 417 + * whether the type of victim is right. 418 + * 1. We can't do it if dir is read-only (done in permission()) 419 + * 2. We should have write and exec permissions on dir 420 + * 3. We can't remove anything from append-only dir 421 + * 4. We can't do anything with immutable dir (done in permission()) 422 + * 5. If the sticky bit on dir is set we should either 423 + * a. be owner of dir, or 424 + * b. be owner of victim, or 425 + * c. have CAP_FOWNER capability 426 + * 6. If the victim is append-only or immutable we can't do antyhing with 427 + * links pointing to it. 428 + * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 429 + * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 430 + * 9. We can't remove a root or mountpoint. 431 + * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 432 + * nfs_async_unlink(). 433 + */ 434 + 435 + static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 436 + { 437 + int error; 438 + 439 + if (!victim->d_inode) 440 + return -ENOENT; 441 + 442 + BUG_ON(victim->d_parent->d_inode != dir); 443 + audit_inode_child(victim, dir); 444 + 445 + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 446 + if (error) 447 + return error; 448 + if (IS_APPEND(dir)) 449 + return -EPERM; 450 + if (btrfs_check_sticky(dir, victim->d_inode)|| 451 + IS_APPEND(victim->d_inode)|| 452 + IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 453 + return -EPERM; 454 + if (isdir) { 455 + if (!S_ISDIR(victim->d_inode->i_mode)) 456 + return -ENOTDIR; 457 + if (IS_ROOT(victim)) 458 + return -EBUSY; 459 + } else if (S_ISDIR(victim->d_inode->i_mode)) 460 + return -EISDIR; 461 + if (IS_DEADDIR(dir)) 462 + return -ENOENT; 463 + if (victim->d_flags & DCACHE_NFSFS_RENAMED) 464 + return -EBUSY; 465 + return 0; 466 + } 467 + 468 /* copy of may_create in fs/namei.c() */ 469 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 470 { ··· 412 */ 413 static noinline int btrfs_mksubvol(struct path *parent, 414 char *name, int namelen, 415 + struct btrfs_root *snap_src, 416 + u64 *async_transid) 417 { 418 struct inode *dir = parent->dentry->d_inode; 419 struct dentry *dentry; ··· 443 goto out_up_read; 444 445 if (snap_src) { 446 + error = create_snapshot(snap_src, dentry, 447 + name, namelen, async_transid); 448 } else { 449 error = create_subvol(BTRFS_I(dir)->root, dentry, 450 + name, namelen, async_transid); 451 } 452 if (!error) 453 fsnotify_mkdir(dir, dentry); ··· 708 char *sizestr; 709 char *devstr = NULL; 710 int ret = 0; 711 int mod = 0; 712 713 if (root->fs_info->sb->s_flags & MS_RDONLY) ··· 722 return PTR_ERR(vol_args); 723 724 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 725 726 mutex_lock(&root->fs_info->volume_mutex); 727 sizestr = vol_args->name; ··· 801 return ret; 802 } 803 804 + static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 805 + char *name, 806 + unsigned long fd, 807 + int subvol, 808 + u64 *transid) 809 { 810 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 811 struct file *src_file; 812 int namelen; 813 int ret = 0; ··· 813 if (root->fs_info->sb->s_flags & MS_RDONLY) 814 return -EROFS; 815 816 + namelen = strlen(name); 817 + if (strchr(name, '/')) { 818 ret = -EINVAL; 819 goto out; 820 } 821 822 if (subvol) { 823 + ret = btrfs_mksubvol(&file->f_path, name, namelen, 824 + NULL, transid); 825 } else { 826 struct inode *src_inode; 827 + src_file = fget(fd); 828 if (!src_file) { 829 ret = -EINVAL; 830 goto out; ··· 843 fput(src_file); 844 goto out; 845 } 846 + ret = btrfs_mksubvol(&file->f_path, name, namelen, 847 + BTRFS_I(src_inode)->root, 848 + transid); 849 fput(src_file); 850 } 851 out: 852 + return ret; 853 + } 854 + 855 + static noinline int btrfs_ioctl_snap_create(struct file *file, 856 + void __user *arg, int subvol, 857 + int async) 858 + { 859 + struct btrfs_ioctl_vol_args *vol_args = NULL; 860 + struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; 861 + char *name; 862 + u64 fd; 863 + u64 transid = 0; 864 + int ret; 865 + 866 + if (async) { 867 + async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); 868 + if (IS_ERR(async_vol_args)) 869 + return PTR_ERR(async_vol_args); 870 + 871 + name = async_vol_args->name; 872 + fd = async_vol_args->fd; 873 + async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; 874 + } else { 875 + vol_args = memdup_user(arg, sizeof(*vol_args)); 876 + if (IS_ERR(vol_args)) 877 + return PTR_ERR(vol_args); 878 + name = vol_args->name; 879 + fd = vol_args->fd; 880 + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 881 + } 882 + 883 + ret = btrfs_ioctl_snap_create_transid(file, name, fd, 884 + subvol, &transid); 885 + 886 + if (!ret && async) { 887 + if (copy_to_user(arg + 888 + offsetof(struct btrfs_ioctl_async_vol_args, 889 + transid), &transid, sizeof(transid))) 890 + return -EFAULT; 891 + } 892 + 893 kfree(vol_args); 894 + kfree(async_vol_args); 895 + 896 return ret; 897 } 898 ··· 1073 if (!capable(CAP_SYS_ADMIN)) 1074 return -EPERM; 1075 1076 + args = memdup_user(argp, sizeof(*args)); 1077 + if (IS_ERR(args)) 1078 + return PTR_ERR(args); 1079 1080 inode = fdentry(file)->d_inode; 1081 ret = search_ioctl(inode, args); 1082 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ··· 1188 if (!capable(CAP_SYS_ADMIN)) 1189 return -EPERM; 1190 1191 + args = memdup_user(argp, sizeof(*args)); 1192 + if (IS_ERR(args)) 1193 + return PTR_ERR(args); 1194 1195 inode = fdentry(file)->d_inode; 1196 1197 if (args->treeid == 0) ··· 1227 int ret; 1228 int err = 0; 1229 1230 vol_args = memdup_user(arg, sizeof(*vol_args)); 1231 if (IS_ERR(vol_args)) 1232 return PTR_ERR(vol_args); ··· 1259 } 1260 1261 inode = dentry->d_inode; 1262 + dest = BTRFS_I(inode)->root; 1263 + if (!capable(CAP_SYS_ADMIN)){ 1264 + /* 1265 + * Regular user. Only allow this with a special mount 1266 + * option, when the user has write+exec access to the 1267 + * subvol root, and when rmdir(2) would have been 1268 + * allowed. 1269 + * 1270 + * Note that this is _not_ check that the subvol is 1271 + * empty or doesn't contain data that we wouldn't 1272 + * otherwise be able to delete. 1273 + * 1274 + * Users who want to delete empty subvols should try 1275 + * rmdir(2). 1276 + */ 1277 + err = -EPERM; 1278 + if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 1279 + goto out_dput; 1280 + 1281 + /* 1282 + * Do not allow deletion if the parent dir is the same 1283 + * as the dir to be deleted. That means the ioctl 1284 + * must be called on the dentry referencing the root 1285 + * of the subvol, not a random directory contained 1286 + * within it. 1287 + */ 1288 + err = -EINVAL; 1289 + if (root == dest) 1290 + goto out_dput; 1291 + 1292 + err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 1293 + if (err) 1294 + goto out_dput; 1295 + 1296 + /* check if subvolume may be deleted by a non-root user */ 1297 + err = btrfs_may_delete(dir, dentry, 1); 1298 + if (err) 1299 + goto out_dput; 1300 + } 1301 + 1302 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1303 err = -EINVAL; 1304 goto out_dput; 1305 } 1306 1307 mutex_lock(&inode->i_mutex); 1308 err = d_invalidate(dentry); ··· 1304 BUG_ON(ret); 1305 } 1306 1307 + ret = btrfs_end_transaction(trans, root); 1308 BUG_ON(ret); 1309 inode->i_flags |= S_DEAD; 1310 out_up_write: ··· 1502 path->reada = 2; 1503 1504 if (inode < src) { 1505 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 1506 + mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 1507 } else { 1508 + mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 1509 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 1510 } 1511 1512 /* determine range to clone */ ··· 1530 while (1) { 1531 struct btrfs_ordered_extent *ordered; 1532 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1533 + ordered = btrfs_lookup_first_ordered_extent(src, off+len); 1534 + if (!ordered && 1535 + !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 1536 + EXTENT_DELALLOC, 0, NULL)) 1537 break; 1538 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1539 if (ordered) 1540 btrfs_put_ordered_extent(ordered); 1541 + btrfs_wait_ordered_range(src, off, len); 1542 } 1543 1544 /* clone data */ ··· 1605 } 1606 btrfs_release_path(root, path); 1607 1608 + if (key.offset + datal <= off || 1609 key.offset >= off+len) 1610 goto next; 1611 ··· 1879 return 0; 1880 } 1881 1882 + static void get_block_group_info(struct list_head *groups_list, 1883 + struct btrfs_ioctl_space_info *space) 1884 + { 1885 + struct btrfs_block_group_cache *block_group; 1886 + 1887 + space->total_bytes = 0; 1888 + space->used_bytes = 0; 1889 + space->flags = 0; 1890 + list_for_each_entry(block_group, groups_list, list) { 1891 + space->flags = block_group->flags; 1892 + space->total_bytes += block_group->key.offset; 1893 + space->used_bytes += 1894 + btrfs_block_group_used(&block_group->item); 1895 + } 1896 + } 1897 + 1898 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 1899 { 1900 struct btrfs_ioctl_space_args space_args; ··· 1887 struct btrfs_ioctl_space_info *dest_orig; 1888 struct btrfs_ioctl_space_info *user_dest; 1889 struct btrfs_space_info *info; 1890 + u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 1891 + BTRFS_BLOCK_GROUP_SYSTEM, 1892 + BTRFS_BLOCK_GROUP_METADATA, 1893 + BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 1894 + int num_types = 4; 1895 int alloc_size; 1896 int ret = 0; 1897 int slot_count = 0; 1898 + int i, c; 1899 1900 if (copy_from_user(&space_args, 1901 (struct btrfs_ioctl_space_args __user *)arg, 1902 sizeof(space_args))) 1903 return -EFAULT; 1904 1905 + for (i = 0; i < num_types; i++) { 1906 + struct btrfs_space_info *tmp; 1907 + 1908 + info = NULL; 1909 + rcu_read_lock(); 1910 + list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 1911 + list) { 1912 + if (tmp->flags == types[i]) { 1913 + info = tmp; 1914 + break; 1915 + } 1916 + } 1917 + rcu_read_unlock(); 1918 + 1919 + if (!info) 1920 + continue; 1921 + 1922 + down_read(&info->groups_sem); 1923 + for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 1924 + if (!list_empty(&info->block_groups[c])) 1925 + slot_count++; 1926 + } 1927 + up_read(&info->groups_sem); 1928 + } 1929 1930 /* space_slots == 0 means they are asking for a count */ 1931 if (space_args.space_slots == 0) { 1932 space_args.total_spaces = slot_count; 1933 goto out; 1934 } 1935 + 1936 + slot_count = min_t(int, space_args.space_slots, slot_count); 1937 + 1938 alloc_size = sizeof(*dest) * slot_count; 1939 + 1940 /* we generally have at most 6 or so space infos, one for each raid 1941 * level. So, a whole page should be more than enough for everyone 1942 */ ··· 1921 dest_orig = dest; 1922 1923 /* now we have a buffer to copy into */ 1924 + for (i = 0; i < num_types; i++) { 1925 + struct btrfs_space_info *tmp; 1926 1927 + info = NULL; 1928 + rcu_read_lock(); 1929 + list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 1930 + list) { 1931 + if (tmp->flags == types[i]) { 1932 + info = tmp; 1933 + break; 1934 + } 1935 + } 1936 + rcu_read_unlock(); 1937 1938 + if (!info) 1939 + continue; 1940 + down_read(&info->groups_sem); 1941 + for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 1942 + if (!list_empty(&info->block_groups[c])) { 1943 + get_block_group_info(&info->block_groups[c], 1944 + &space); 1945 + memcpy(dest, &space, sizeof(space)); 1946 + dest++; 1947 + space_args.total_spaces++; 1948 + } 1949 + } 1950 + up_read(&info->groups_sem); 1951 } 1952 1953 user_dest = (struct btrfs_ioctl_space_info *) 1954 (arg + sizeof(struct btrfs_ioctl_space_args)); ··· 1984 return 0; 1985 } 1986 1987 + static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) 1988 + { 1989 + struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 1990 + struct btrfs_trans_handle *trans; 1991 + u64 transid; 1992 + 1993 + trans = btrfs_start_transaction(root, 0); 1994 + transid = trans->transid; 1995 + btrfs_commit_transaction_async(trans, root, 0); 1996 + 1997 + if (argp) 1998 + if (copy_to_user(argp, &transid, sizeof(transid))) 1999 + return -EFAULT; 2000 + return 0; 2001 + } 2002 + 2003 + static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) 2004 + { 2005 + struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2006 + u64 transid; 2007 + 2008 + if (argp) { 2009 + if (copy_from_user(&transid, argp, sizeof(transid))) 2010 + return -EFAULT; 2011 + } else { 2012 + transid = 0; /* current trans */ 2013 + } 2014 + return btrfs_wait_for_commit(root, transid); 2015 + } 2016 + 2017 long btrfs_ioctl(struct file *file, unsigned int 2018 cmd, unsigned long arg) 2019 { ··· 1998 case FS_IOC_GETVERSION: 1999 return btrfs_ioctl_getversion(file, argp); 2000 case BTRFS_IOC_SNAP_CREATE: 2001 + return btrfs_ioctl_snap_create(file, argp, 0, 0); 2002 + case BTRFS_IOC_SNAP_CREATE_ASYNC: 2003 + return btrfs_ioctl_snap_create(file, argp, 0, 1); 2004 case BTRFS_IOC_SUBVOL_CREATE: 2005 + return btrfs_ioctl_snap_create(file, argp, 1, 0); 2006 case BTRFS_IOC_SNAP_DESTROY: 2007 return btrfs_ioctl_snap_destroy(file, argp); 2008 case BTRFS_IOC_DEFAULT_SUBVOL: ··· 2034 case BTRFS_IOC_SYNC: 2035 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2036 return 0; 2037 + case BTRFS_IOC_START_SYNC: 2038 + return btrfs_ioctl_start_sync(file, argp); 2039 + case BTRFS_IOC_WAIT_SYNC: 2040 + return btrfs_ioctl_wait_sync(file, argp); 2041 } 2042 2043 return -ENOTTY;

+12 -1

fs/btrfs/ioctl.h

··· 22 23 #define BTRFS_IOCTL_MAGIC 0x94 24 #define BTRFS_VOL_NAME_MAX 255 25 - #define BTRFS_PATH_NAME_MAX 4087 26 27 /* this should be 4k */ 28 struct btrfs_ioctl_vol_args { 29 __s64 fd; 30 char name[BTRFS_PATH_NAME_MAX + 1]; 31 }; 32 33 #define BTRFS_INO_LOOKUP_PATH_MAX 4080 ··· 185 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 186 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 187 struct btrfs_ioctl_space_args) 188 #endif

··· 22 23 #define BTRFS_IOCTL_MAGIC 0x94 24 #define BTRFS_VOL_NAME_MAX 255 25 26 /* this should be 4k */ 27 + #define BTRFS_PATH_NAME_MAX 4087 28 struct btrfs_ioctl_vol_args { 29 __s64 fd; 30 char name[BTRFS_PATH_NAME_MAX + 1]; 31 + }; 32 + 33 + #define BTRFS_SNAPSHOT_NAME_MAX 4079 34 + struct btrfs_ioctl_async_vol_args { 35 + __s64 fd; 36 + __u64 transid; 37 + char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; 38 }; 39 40 #define BTRFS_INO_LOOKUP_PATH_MAX 4080 ··· 178 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 179 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 180 struct btrfs_ioctl_space_args) 181 + #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 182 + #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 183 + #define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ 184 + struct btrfs_ioctl_async_vol_args) 185 #endif

-2

fs/btrfs/ordered-data.c

··· 526 { 527 u64 end; 528 u64 orig_end; 529 - u64 wait_end; 530 struct btrfs_ordered_extent *ordered; 531 int found; 532 ··· 536 if (orig_end > INT_LIMIT(loff_t)) 537 orig_end = INT_LIMIT(loff_t); 538 } 539 - wait_end = orig_end; 540 again: 541 /* start IO across the range first to instantiate any delalloc 542 * extents

··· 526 { 527 u64 end; 528 u64 orig_end; 529 struct btrfs_ordered_extent *ordered; 530 int found; 531 ··· 537 if (orig_end > INT_LIMIT(loff_t)) 538 orig_end = INT_LIMIT(loff_t); 539 } 540 again: 541 /* start IO across the range first to instantiate any delalloc 542 * extents

+93 -16

fs/btrfs/relocation.c

··· 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 33 /* 34 * backref_node, mapping_node and tree_block start with this ··· 178 179 u64 search_start; 180 u64 extents_found; 181 - 182 - int block_rsv_retries; 183 184 unsigned int stage:8; 185 unsigned int create_reloc_tree:1; ··· 2132 LIST_HEAD(reloc_roots); 2133 u64 num_bytes = 0; 2134 int ret; 2135 - int retries = 0; 2136 2137 mutex_lock(&root->fs_info->trans_mutex); 2138 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; ··· 2141 if (!err) { 2142 num_bytes = rc->merging_rsv_size; 2143 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2144 - num_bytes, &retries); 2145 if (ret) 2146 err = ret; 2147 } ··· 2153 btrfs_end_transaction(trans, rc->extent_root); 2154 btrfs_block_rsv_release(rc->extent_root, 2155 rc->block_rsv, num_bytes); 2156 - retries = 0; 2157 goto again; 2158 } 2159 } ··· 2402 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2403 2404 trans->block_rsv = rc->block_rsv; 2405 - ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, 2406 - &rc->block_rsv_retries); 2407 if (ret) { 2408 if (ret == -EAGAIN) 2409 rc->commit_transaction = 1; 2410 return ret; 2411 } 2412 2413 - rc->block_rsv_retries = 0; 2414 return 0; 2415 } 2416 ··· 3094 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3095 ret = get_ref_objectid_v0(rc, path, extent_key, 3096 &ref_owner, NULL); 3097 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3098 level = (int)ref_owner; 3099 /* FIXME: get real generation */ ··· 3188 return ret; 3189 } 3190 3191 /* 3192 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3193 * this function scans fs tree to find blocks reference the data extent ··· 3262 int counted; 3263 int ret; 3264 3265 - path = btrfs_alloc_path(); 3266 - if (!path) 3267 - return -ENOMEM; 3268 - 3269 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3270 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3271 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3272 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3273 3274 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3275 if (IS_ERR(root)) { ··· 3611 * is no reservation in transaction handle. 3612 */ 3613 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3614 - rc->extent_root->nodesize * 256, 3615 - &rc->block_rsv_retries); 3616 if (ret) 3617 return ret; 3618 ··· 3623 rc->extents_found = 0; 3624 rc->nodes_relocated = 0; 3625 rc->merging_rsv_size = 0; 3626 - rc->block_rsv_retries = 0; 3627 3628 rc->create_reloc_tree = 1; 3629 set_reloc_control(rc); ··· 3915 { 3916 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3917 struct reloc_control *rc; 3918 int ret; 3919 int rw = 0; 3920 int err = 0; ··· 3937 goto out; 3938 } 3939 rw = 1; 3940 } 3941 3942 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); ··· 4220 btrfs_add_ordered_sum(inode, ordered, sums); 4221 } 4222 btrfs_put_ordered_extent(ordered); 4223 - return 0; 4224 } 4225 4226 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,

··· 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 + #include "free-space-cache.h" 33 34 /* 35 * backref_node, mapping_node and tree_block start with this ··· 177 178 u64 search_start; 179 u64 extents_found; 180 181 unsigned int stage:8; 182 unsigned int create_reloc_tree:1; ··· 2133 LIST_HEAD(reloc_roots); 2134 u64 num_bytes = 0; 2135 int ret; 2136 2137 mutex_lock(&root->fs_info->trans_mutex); 2138 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; ··· 2143 if (!err) { 2144 num_bytes = rc->merging_rsv_size; 2145 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2146 + num_bytes); 2147 if (ret) 2148 err = ret; 2149 } ··· 2155 btrfs_end_transaction(trans, rc->extent_root); 2156 btrfs_block_rsv_release(rc->extent_root, 2157 rc->block_rsv, num_bytes); 2158 goto again; 2159 } 2160 } ··· 2405 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2406 2407 trans->block_rsv = rc->block_rsv; 2408 + ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); 2409 if (ret) { 2410 if (ret == -EAGAIN) 2411 rc->commit_transaction = 1; 2412 return ret; 2413 } 2414 2415 return 0; 2416 } 2417 ··· 3099 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3100 ret = get_ref_objectid_v0(rc, path, extent_key, 3101 &ref_owner, NULL); 3102 + if (ret < 0) 3103 + return ret; 3104 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3105 level = (int)ref_owner; 3106 /* FIXME: get real generation */ ··· 3191 return ret; 3192 } 3193 3194 + static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3195 + struct inode *inode, u64 ino) 3196 + { 3197 + struct btrfs_key key; 3198 + struct btrfs_path *path; 3199 + struct btrfs_root *root = fs_info->tree_root; 3200 + struct btrfs_trans_handle *trans; 3201 + unsigned long nr; 3202 + int ret = 0; 3203 + 3204 + if (inode) 3205 + goto truncate; 3206 + 3207 + key.objectid = ino; 3208 + key.type = BTRFS_INODE_ITEM_KEY; 3209 + key.offset = 0; 3210 + 3211 + inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3212 + if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { 3213 + if (inode && !IS_ERR(inode)) 3214 + iput(inode); 3215 + return -ENOENT; 3216 + } 3217 + 3218 + truncate: 3219 + path = btrfs_alloc_path(); 3220 + if (!path) { 3221 + ret = -ENOMEM; 3222 + goto out; 3223 + } 3224 + 3225 + trans = btrfs_join_transaction(root, 0); 3226 + if (IS_ERR(trans)) { 3227 + btrfs_free_path(path); 3228 + goto out; 3229 + } 3230 + 3231 + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 3232 + 3233 + btrfs_free_path(path); 3234 + nr = trans->blocks_used; 3235 + btrfs_end_transaction(trans, root); 3236 + btrfs_btree_balance_dirty(root, nr); 3237 + out: 3238 + iput(inode); 3239 + return ret; 3240 + } 3241 + 3242 /* 3243 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3244 * this function scans fs tree to find blocks reference the data extent ··· 3217 int counted; 3218 int ret; 3219 3220 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3221 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3222 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3223 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3224 + 3225 + /* 3226 + * This is an extent belonging to the free space cache, lets just delete 3227 + * it and redo the search. 3228 + */ 3229 + if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3230 + ret = delete_block_group_cache(rc->extent_root->fs_info, 3231 + NULL, ref_objectid); 3232 + if (ret != -ENOENT) 3233 + return ret; 3234 + ret = 0; 3235 + } 3236 + 3237 + path = btrfs_alloc_path(); 3238 + if (!path) 3239 + return -ENOMEM; 3240 3241 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3242 if (IS_ERR(root)) { ··· 3554 * is no reservation in transaction handle. 3555 */ 3556 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3557 + rc->extent_root->nodesize * 256); 3558 if (ret) 3559 return ret; 3560 ··· 3567 rc->extents_found = 0; 3568 rc->nodes_relocated = 0; 3569 rc->merging_rsv_size = 0; 3570 3571 rc->create_reloc_tree = 1; 3572 set_reloc_control(rc); ··· 3860 { 3861 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3862 struct reloc_control *rc; 3863 + struct inode *inode; 3864 + struct btrfs_path *path; 3865 int ret; 3866 int rw = 0; 3867 int err = 0; ··· 3880 goto out; 3881 } 3882 rw = 1; 3883 + } 3884 + 3885 + path = btrfs_alloc_path(); 3886 + if (!path) { 3887 + err = -ENOMEM; 3888 + goto out; 3889 + } 3890 + 3891 + inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, 3892 + path); 3893 + btrfs_free_path(path); 3894 + 3895 + if (!IS_ERR(inode)) 3896 + ret = delete_block_group_cache(fs_info, inode, 0); 3897 + else 3898 + ret = PTR_ERR(inode); 3899 + 3900 + if (ret && ret != -ENOENT) { 3901 + err = ret; 3902 + goto out; 3903 } 3904 3905 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); ··· 4143 btrfs_add_ordered_sum(inode, ordered, sums); 4144 } 4145 btrfs_put_ordered_extent(ordered); 4146 + return ret; 4147 } 4148 4149 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,

-2

fs/btrfs/root-tree.c

··· 181 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) 182 { 183 struct btrfs_root *dead_root; 184 - struct btrfs_item *item; 185 struct btrfs_root_item *ri; 186 struct btrfs_key key; 187 struct btrfs_key found_key; ··· 213 nritems = btrfs_header_nritems(leaf); 214 slot = path->slots[0]; 215 } 216 - item = btrfs_item_nr(leaf, slot); 217 btrfs_item_key_to_cpu(leaf, &key, slot); 218 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) 219 goto next;

··· 181 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) 182 { 183 struct btrfs_root *dead_root; 184 struct btrfs_root_item *ri; 185 struct btrfs_key key; 186 struct btrfs_key found_key; ··· 214 nritems = btrfs_header_nritems(leaf); 215 slot = path->slots[0]; 216 } 217 btrfs_item_key_to_cpu(leaf, &key, slot); 218 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) 219 goto next;

+30 -12

fs/btrfs/super.c

··· 61 62 ret = close_ctree(root); 63 sb->s_fs_info = NULL; 64 } 65 66 enum { ··· 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 72 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 73 - Opt_discard, Opt_err, 74 }; 75 76 static match_table_t tokens = { ··· 95 {Opt_flushoncommit, "flushoncommit"}, 96 {Opt_ratio, "metadata_ratio=%d"}, 97 {Opt_discard, "discard"}, 98 {Opt_err, NULL}, 99 }; 100 ··· 240 break; 241 case Opt_discard: 242 btrfs_set_opt(info->mount_opt, DISCARD); 243 break; 244 case Opt_err: 245 printk(KERN_INFO "btrfs: unrecognized mount option " ··· 396 find_root: 397 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 398 if (IS_ERR(new_root)) 399 - return ERR_PTR(PTR_ERR(new_root)); 400 401 if (btrfs_root_refs(&new_root->root_item) == 0) 402 return ERR_PTR(-ENOENT); ··· 452 { 453 struct inode *inode; 454 struct dentry *root_dentry; 455 - struct btrfs_super_block *disk_super; 456 struct btrfs_root *tree_root; 457 struct btrfs_key key; 458 int err; ··· 473 return PTR_ERR(tree_root); 474 } 475 sb->s_fs_info = tree_root; 476 - disk_super = &tree_root->fs_info->super_copy; 477 478 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 479 key.type = BTRFS_INODE_ITEM_KEY; ··· 585 char *subvol_name = NULL; 586 u64 subvol_objectid = 0; 587 int error = 0; 588 - int found = 0; 589 590 if (!(flags & MS_RDONLY)) 591 mode |= FMODE_WRITE; ··· 620 goto error_close_devices; 621 } 622 623 - found = 1; 624 btrfs_close_devices(fs_devices); 625 } else { 626 char b[BDEVNAME_SIZE]; ··· 641 if (IS_ERR(root)) { 642 error = PTR_ERR(root); 643 deactivate_locked_super(s); 644 - goto error; 645 } 646 /* if they gave us a subvolume name bind mount into that */ 647 if (strcmp(subvol_name, ".")) { ··· 655 deactivate_locked_super(s); 656 error = PTR_ERR(new_root); 657 dput(root); 658 - goto error_close_devices; 659 } 660 if (!new_root->d_inode) { 661 dput(root); 662 dput(new_root); 663 deactivate_locked_super(s); 664 error = -ENXIO; 665 - goto error_close_devices; 666 } 667 dput(root); 668 root = new_root; ··· 677 btrfs_close_devices(fs_devices); 678 error_free_subvol_name: 679 kfree(subvol_name); 680 - error: 681 return ERR_PTR(error); 682 } 683 ··· 724 struct list_head *head = &root->fs_info->space_info; 725 struct btrfs_space_info *found; 726 u64 total_used = 0; 727 int bits = dentry->d_sb->s_blocksize_bits; 728 __be32 *fsid = (__be32 *)root->fs_info->fsid; 729 730 rcu_read_lock(); 731 - list_for_each_entry_rcu(found, head, list) 732 total_used += found->disk_used; 733 rcu_read_unlock(); 734 735 buf->f_namelen = BTRFS_NAME_LEN; 736 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 737 buf->f_bfree = buf->f_blocks - (total_used >> bits); 738 - buf->f_bavail = buf->f_bfree; 739 buf->f_bsize = dentry->d_sb->s_blocksize; 740 buf->f_type = BTRFS_SUPER_MAGIC; 741

··· 61 62 ret = close_ctree(root); 63 sb->s_fs_info = NULL; 64 + 65 + (void)ret; /* FIXME: need to fix VFS to return error? */ 66 } 67 68 enum { ··· 68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 71 + Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, 72 + Opt_user_subvol_rm_allowed, 73 }; 74 75 static match_table_t tokens = { ··· 92 {Opt_flushoncommit, "flushoncommit"}, 93 {Opt_ratio, "metadata_ratio=%d"}, 94 {Opt_discard, "discard"}, 95 + {Opt_space_cache, "space_cache"}, 96 + {Opt_clear_cache, "clear_cache"}, 97 + {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 98 {Opt_err, NULL}, 99 }; 100 ··· 234 break; 235 case Opt_discard: 236 btrfs_set_opt(info->mount_opt, DISCARD); 237 + break; 238 + case Opt_space_cache: 239 + printk(KERN_INFO "btrfs: enabling disk space caching\n"); 240 + btrfs_set_opt(info->mount_opt, SPACE_CACHE); 241 + case Opt_clear_cache: 242 + printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 243 + btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 244 + break; 245 + case Opt_user_subvol_rm_allowed: 246 + btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 247 break; 248 case Opt_err: 249 printk(KERN_INFO "btrfs: unrecognized mount option " ··· 380 find_root: 381 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 382 if (IS_ERR(new_root)) 383 + return ERR_CAST(new_root); 384 385 if (btrfs_root_refs(&new_root->root_item) == 0) 386 return ERR_PTR(-ENOENT); ··· 436 { 437 struct inode *inode; 438 struct dentry *root_dentry; 439 struct btrfs_root *tree_root; 440 struct btrfs_key key; 441 int err; ··· 458 return PTR_ERR(tree_root); 459 } 460 sb->s_fs_info = tree_root; 461 462 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 463 key.type = BTRFS_INODE_ITEM_KEY; ··· 571 char *subvol_name = NULL; 572 u64 subvol_objectid = 0; 573 int error = 0; 574 575 if (!(flags & MS_RDONLY)) 576 mode |= FMODE_WRITE; ··· 607 goto error_close_devices; 608 } 609 610 btrfs_close_devices(fs_devices); 611 } else { 612 char b[BDEVNAME_SIZE]; ··· 629 if (IS_ERR(root)) { 630 error = PTR_ERR(root); 631 deactivate_locked_super(s); 632 + goto error_free_subvol_name; 633 } 634 /* if they gave us a subvolume name bind mount into that */ 635 if (strcmp(subvol_name, ".")) { ··· 643 deactivate_locked_super(s); 644 error = PTR_ERR(new_root); 645 dput(root); 646 + goto error_free_subvol_name; 647 } 648 if (!new_root->d_inode) { 649 dput(root); 650 dput(new_root); 651 deactivate_locked_super(s); 652 error = -ENXIO; 653 + goto error_free_subvol_name; 654 } 655 dput(root); 656 root = new_root; ··· 665 btrfs_close_devices(fs_devices); 666 error_free_subvol_name: 667 kfree(subvol_name); 668 return ERR_PTR(error); 669 } 670 ··· 713 struct list_head *head = &root->fs_info->space_info; 714 struct btrfs_space_info *found; 715 u64 total_used = 0; 716 + u64 total_used_data = 0; 717 int bits = dentry->d_sb->s_blocksize_bits; 718 __be32 *fsid = (__be32 *)root->fs_info->fsid; 719 720 rcu_read_lock(); 721 + list_for_each_entry_rcu(found, head, list) { 722 + if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | 723 + BTRFS_BLOCK_GROUP_SYSTEM)) 724 + total_used_data += found->disk_total; 725 + else 726 + total_used_data += found->disk_used; 727 total_used += found->disk_used; 728 + } 729 rcu_read_unlock(); 730 731 buf->f_namelen = BTRFS_NAME_LEN; 732 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 733 buf->f_bfree = buf->f_blocks - (total_used >> bits); 734 + buf->f_bavail = buf->f_blocks - (total_used_data >> bits); 735 buf->f_bsize = dentry->d_sb->s_blocksize; 736 buf->f_type = BTRFS_SUPER_MAGIC; 737

+210 -24

fs/btrfs/transaction.c

··· 163 TRANS_START, 164 TRANS_JOIN, 165 TRANS_USERSPACE, 166 }; 167 168 static int may_wait_transaction(struct btrfs_root *root, int type) ··· 180 { 181 struct btrfs_trans_handle *h; 182 struct btrfs_transaction *cur_trans; 183 - int retries = 0; 184 int ret; 185 again: 186 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 187 if (!h) 188 return ERR_PTR(-ENOMEM); 189 190 - mutex_lock(&root->fs_info->trans_mutex); 191 if (may_wait_transaction(root, type)) 192 wait_current_trans(root); 193 ··· 196 197 cur_trans = root->fs_info->running_transaction; 198 cur_trans->use_count++; 199 - mutex_unlock(&root->fs_info->trans_mutex); 200 201 h->transid = cur_trans->transid; 202 h->transaction = cur_trans; ··· 214 } 215 216 if (num_items > 0) { 217 - ret = btrfs_trans_reserve_metadata(h, root, num_items, 218 - &retries); 219 if (ret == -EAGAIN) { 220 btrfs_commit_transaction(h, root); 221 goto again; ··· 225 } 226 } 227 228 - mutex_lock(&root->fs_info->trans_mutex); 229 record_root_in_trans(h, root); 230 - mutex_unlock(&root->fs_info->trans_mutex); 231 232 if (!current->journal_info && type != TRANS_USERSPACE) 233 current->journal_info = h; ··· 245 int num_blocks) 246 { 247 return start_transaction(root, 0, TRANS_JOIN); 248 } 249 250 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, ··· 277 mutex_unlock(&root->fs_info->trans_mutex); 278 finish_wait(&commit->commit_wait, &wait); 279 return 0; 280 } 281 282 #if 0 ··· 409 } 410 411 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 412 - struct btrfs_root *root, int throttle) 413 { 414 struct btrfs_transaction *cur_trans = trans->transaction; 415 struct btrfs_fs_info *info = root->fs_info; ··· 437 438 btrfs_trans_release_metadata(trans, root); 439 440 - if (!root->fs_info->open_ioctl_trans && 441 should_end_transaction(trans, root)) 442 trans->transaction->blocked = 1; 443 444 - if (cur_trans->blocked && !cur_trans->in_commit) { 445 if (throttle) 446 return btrfs_commit_transaction(trans, root); 447 else 448 wake_up_process(info->transaction_kthread); 449 } 450 451 - mutex_lock(&info->trans_mutex); 452 WARN_ON(cur_trans != info->running_transaction); 453 WARN_ON(cur_trans->num_writers < 1); 454 cur_trans->num_writers--; 455 456 if (waitqueue_active(&cur_trans->writer_wait)) 457 wake_up(&cur_trans->writer_wait); 458 put_transaction(cur_trans); 459 - mutex_unlock(&info->trans_mutex); 460 461 if (current->journal_info == trans) 462 current->journal_info = NULL; ··· 475 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 476 struct btrfs_root *root) 477 { 478 - return __btrfs_end_transaction(trans, root, 0); 479 } 480 481 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 482 struct btrfs_root *root) 483 { 484 - return __btrfs_end_transaction(trans, root, 1); 485 } 486 487 /* ··· 906 struct extent_buffer *tmp; 907 struct extent_buffer *old; 908 int ret; 909 - int retries = 0; 910 u64 to_reserve = 0; 911 u64 index = 0; 912 u64 objectid; ··· 927 928 if (to_reserve > 0) { 929 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 930 - to_reserve, &retries); 931 if (ret) { 932 pending->error = ret; 933 goto fail; ··· 1035 super->root = root_item->bytenr; 1036 super->generation = root_item->generation; 1037 super->root_level = root_item->level; 1038 } 1039 1040 int btrfs_transaction_in_commit(struct btrfs_fs_info *info) ··· 1059 return ret; 1060 } 1061 1062 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1063 struct btrfs_root *root) 1064 { 1065 unsigned long joined = 0; 1066 - unsigned long timeout = 1; 1067 struct btrfs_transaction *cur_trans; 1068 struct btrfs_transaction *prev_trans = NULL; 1069 DEFINE_WAIT(wait); ··· 1226 1227 trans->transaction->in_commit = 1; 1228 trans->transaction->blocked = 1; 1229 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1230 prev_trans = list_entry(cur_trans->list.prev, 1231 struct btrfs_transaction, list); ··· 1252 snap_pending = 1; 1253 1254 WARN_ON(cur_trans != trans->transaction); 1255 - if (cur_trans->num_writers > 1) 1256 - timeout = MAX_SCHEDULE_TIMEOUT; 1257 - else if (should_grow) 1258 - timeout = 1; 1259 - 1260 mutex_unlock(&root->fs_info->trans_mutex); 1261 1262 if (flush_on_commit || snap_pending) { ··· 1273 TASK_UNINTERRUPTIBLE); 1274 1275 smp_mb(); 1276 - if (cur_trans->num_writers > 1 || should_grow) 1277 - schedule_timeout(timeout); 1278 1279 mutex_lock(&root->fs_info->trans_mutex); 1280 finish_wait(&cur_trans->writer_wait, &wait);

··· 163 TRANS_START, 164 TRANS_JOIN, 165 TRANS_USERSPACE, 166 + TRANS_JOIN_NOLOCK, 167 }; 168 169 static int may_wait_transaction(struct btrfs_root *root, int type) ··· 179 { 180 struct btrfs_trans_handle *h; 181 struct btrfs_transaction *cur_trans; 182 int ret; 183 again: 184 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 185 if (!h) 186 return ERR_PTR(-ENOMEM); 187 188 + if (type != TRANS_JOIN_NOLOCK) 189 + mutex_lock(&root->fs_info->trans_mutex); 190 if (may_wait_transaction(root, type)) 191 wait_current_trans(root); 192 ··· 195 196 cur_trans = root->fs_info->running_transaction; 197 cur_trans->use_count++; 198 + if (type != TRANS_JOIN_NOLOCK) 199 + mutex_unlock(&root->fs_info->trans_mutex); 200 201 h->transid = cur_trans->transid; 202 h->transaction = cur_trans; ··· 212 } 213 214 if (num_items > 0) { 215 + ret = btrfs_trans_reserve_metadata(h, root, num_items); 216 if (ret == -EAGAIN) { 217 btrfs_commit_transaction(h, root); 218 goto again; ··· 224 } 225 } 226 227 + if (type != TRANS_JOIN_NOLOCK) 228 + mutex_lock(&root->fs_info->trans_mutex); 229 record_root_in_trans(h, root); 230 + if (type != TRANS_JOIN_NOLOCK) 231 + mutex_unlock(&root->fs_info->trans_mutex); 232 233 if (!current->journal_info && type != TRANS_USERSPACE) 234 current->journal_info = h; ··· 242 int num_blocks) 243 { 244 return start_transaction(root, 0, TRANS_JOIN); 245 + } 246 + 247 + struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 248 + int num_blocks) 249 + { 250 + return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 251 } 252 253 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, ··· 268 mutex_unlock(&root->fs_info->trans_mutex); 269 finish_wait(&commit->commit_wait, &wait); 270 return 0; 271 + } 272 + 273 + int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) 274 + { 275 + struct btrfs_transaction *cur_trans = NULL, *t; 276 + int ret; 277 + 278 + mutex_lock(&root->fs_info->trans_mutex); 279 + 280 + ret = 0; 281 + if (transid) { 282 + if (transid <= root->fs_info->last_trans_committed) 283 + goto out_unlock; 284 + 285 + /* find specified transaction */ 286 + list_for_each_entry(t, &root->fs_info->trans_list, list) { 287 + if (t->transid == transid) { 288 + cur_trans = t; 289 + break; 290 + } 291 + if (t->transid > transid) 292 + break; 293 + } 294 + ret = -EINVAL; 295 + if (!cur_trans) 296 + goto out_unlock; /* bad transid */ 297 + } else { 298 + /* find newest transaction that is committing | committed */ 299 + list_for_each_entry_reverse(t, &root->fs_info->trans_list, 300 + list) { 301 + if (t->in_commit) { 302 + if (t->commit_done) 303 + goto out_unlock; 304 + cur_trans = t; 305 + break; 306 + } 307 + } 308 + if (!cur_trans) 309 + goto out_unlock; /* nothing committing|committed */ 310 + } 311 + 312 + cur_trans->use_count++; 313 + mutex_unlock(&root->fs_info->trans_mutex); 314 + 315 + wait_for_commit(root, cur_trans); 316 + 317 + mutex_lock(&root->fs_info->trans_mutex); 318 + put_transaction(cur_trans); 319 + ret = 0; 320 + out_unlock: 321 + mutex_unlock(&root->fs_info->trans_mutex); 322 + return ret; 323 } 324 325 #if 0 ··· 348 } 349 350 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 351 + struct btrfs_root *root, int throttle, int lock) 352 { 353 struct btrfs_transaction *cur_trans = trans->transaction; 354 struct btrfs_fs_info *info = root->fs_info; ··· 376 377 btrfs_trans_release_metadata(trans, root); 378 379 + if (lock && !root->fs_info->open_ioctl_trans && 380 should_end_transaction(trans, root)) 381 trans->transaction->blocked = 1; 382 383 + if (lock && cur_trans->blocked && !cur_trans->in_commit) { 384 if (throttle) 385 return btrfs_commit_transaction(trans, root); 386 else 387 wake_up_process(info->transaction_kthread); 388 } 389 390 + if (lock) 391 + mutex_lock(&info->trans_mutex); 392 WARN_ON(cur_trans != info->running_transaction); 393 WARN_ON(cur_trans->num_writers < 1); 394 cur_trans->num_writers--; 395 396 + smp_mb(); 397 if (waitqueue_active(&cur_trans->writer_wait)) 398 wake_up(&cur_trans->writer_wait); 399 put_transaction(cur_trans); 400 + if (lock) 401 + mutex_unlock(&info->trans_mutex); 402 403 if (current->journal_info == trans) 404 current->journal_info = NULL; ··· 411 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 412 struct btrfs_root *root) 413 { 414 + return __btrfs_end_transaction(trans, root, 0, 1); 415 } 416 417 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 418 struct btrfs_root *root) 419 { 420 + return __btrfs_end_transaction(trans, root, 1, 1); 421 + } 422 + 423 + int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 424 + struct btrfs_root *root) 425 + { 426 + return __btrfs_end_transaction(trans, root, 0, 0); 427 } 428 429 /* ··· 836 struct extent_buffer *tmp; 837 struct extent_buffer *old; 838 int ret; 839 u64 to_reserve = 0; 840 u64 index = 0; 841 u64 objectid; ··· 858 859 if (to_reserve > 0) { 860 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 861 + to_reserve); 862 if (ret) { 863 pending->error = ret; 864 goto fail; ··· 966 super->root = root_item->bytenr; 967 super->generation = root_item->generation; 968 super->root_level = root_item->level; 969 + if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) 970 + super->cache_generation = root_item->generation; 971 } 972 973 int btrfs_transaction_in_commit(struct btrfs_fs_info *info) ··· 988 return ret; 989 } 990 991 + /* 992 + * wait for the current transaction commit to start and block subsequent 993 + * transaction joins 994 + */ 995 + static void wait_current_trans_commit_start(struct btrfs_root *root, 996 + struct btrfs_transaction *trans) 997 + { 998 + DEFINE_WAIT(wait); 999 + 1000 + if (trans->in_commit) 1001 + return; 1002 + 1003 + while (1) { 1004 + prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, 1005 + TASK_UNINTERRUPTIBLE); 1006 + if (trans->in_commit) { 1007 + finish_wait(&root->fs_info->transaction_blocked_wait, 1008 + &wait); 1009 + break; 1010 + } 1011 + mutex_unlock(&root->fs_info->trans_mutex); 1012 + schedule(); 1013 + mutex_lock(&root->fs_info->trans_mutex); 1014 + finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1015 + } 1016 + } 1017 + 1018 + /* 1019 + * wait for the current transaction to start and then become unblocked. 1020 + * caller holds ref. 1021 + */ 1022 + static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, 1023 + struct btrfs_transaction *trans) 1024 + { 1025 + DEFINE_WAIT(wait); 1026 + 1027 + if (trans->commit_done || (trans->in_commit && !trans->blocked)) 1028 + return; 1029 + 1030 + while (1) { 1031 + prepare_to_wait(&root->fs_info->transaction_wait, &wait, 1032 + TASK_UNINTERRUPTIBLE); 1033 + if (trans->commit_done || 1034 + (trans->in_commit && !trans->blocked)) { 1035 + finish_wait(&root->fs_info->transaction_wait, 1036 + &wait); 1037 + break; 1038 + } 1039 + mutex_unlock(&root->fs_info->trans_mutex); 1040 + schedule(); 1041 + mutex_lock(&root->fs_info->trans_mutex); 1042 + finish_wait(&root->fs_info->transaction_wait, 1043 + &wait); 1044 + } 1045 + } 1046 + 1047 + /* 1048 + * commit transactions asynchronously. once btrfs_commit_transaction_async 1049 + * returns, any subsequent transaction will not be allowed to join. 1050 + */ 1051 + struct btrfs_async_commit { 1052 + struct btrfs_trans_handle *newtrans; 1053 + struct btrfs_root *root; 1054 + struct delayed_work work; 1055 + }; 1056 + 1057 + static void do_async_commit(struct work_struct *work) 1058 + { 1059 + struct btrfs_async_commit *ac = 1060 + container_of(work, struct btrfs_async_commit, work.work); 1061 + 1062 + btrfs_commit_transaction(ac->newtrans, ac->root); 1063 + kfree(ac); 1064 + } 1065 + 1066 + int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 1067 + struct btrfs_root *root, 1068 + int wait_for_unblock) 1069 + { 1070 + struct btrfs_async_commit *ac; 1071 + struct btrfs_transaction *cur_trans; 1072 + 1073 + ac = kmalloc(sizeof(*ac), GFP_NOFS); 1074 + BUG_ON(!ac); 1075 + 1076 + INIT_DELAYED_WORK(&ac->work, do_async_commit); 1077 + ac->root = root; 1078 + ac->newtrans = btrfs_join_transaction(root, 0); 1079 + 1080 + /* take transaction reference */ 1081 + mutex_lock(&root->fs_info->trans_mutex); 1082 + cur_trans = trans->transaction; 1083 + cur_trans->use_count++; 1084 + mutex_unlock(&root->fs_info->trans_mutex); 1085 + 1086 + btrfs_end_transaction(trans, root); 1087 + schedule_delayed_work(&ac->work, 0); 1088 + 1089 + /* wait for transaction to start and unblock */ 1090 + mutex_lock(&root->fs_info->trans_mutex); 1091 + if (wait_for_unblock) 1092 + wait_current_trans_commit_start_and_unblock(root, cur_trans); 1093 + else 1094 + wait_current_trans_commit_start(root, cur_trans); 1095 + put_transaction(cur_trans); 1096 + mutex_unlock(&root->fs_info->trans_mutex); 1097 + 1098 + return 0; 1099 + } 1100 + 1101 + /* 1102 + * btrfs_transaction state sequence: 1103 + * in_commit = 0, blocked = 0 (initial) 1104 + * in_commit = 1, blocked = 1 1105 + * blocked = 0 1106 + * commit_done = 1 1107 + */ 1108 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1109 struct btrfs_root *root) 1110 { 1111 unsigned long joined = 0; 1112 struct btrfs_transaction *cur_trans; 1113 struct btrfs_transaction *prev_trans = NULL; 1114 DEFINE_WAIT(wait); ··· 1039 1040 trans->transaction->in_commit = 1; 1041 trans->transaction->blocked = 1; 1042 + wake_up(&root->fs_info->transaction_blocked_wait); 1043 + 1044 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1045 prev_trans = list_entry(cur_trans->list.prev, 1046 struct btrfs_transaction, list); ··· 1063 snap_pending = 1; 1064 1065 WARN_ON(cur_trans != trans->transaction); 1066 mutex_unlock(&root->fs_info->trans_mutex); 1067 1068 if (flush_on_commit || snap_pending) { ··· 1089 TASK_UNINTERRUPTIBLE); 1090 1091 smp_mb(); 1092 + if (cur_trans->num_writers > 1) 1093 + schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1094 + else if (should_grow) 1095 + schedule_timeout(1); 1096 1097 mutex_lock(&root->fs_info->trans_mutex); 1098 finish_wait(&cur_trans->writer_wait, &wait);

+8

fs/btrfs/transaction.h

··· 87 88 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 89 struct btrfs_root *root); 90 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 91 int num_items); 92 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 93 int num_blocks); 94 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 95 int num_blocks); 96 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 97 struct btrfs_root *root); 98 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, ··· 109 int btrfs_clean_old_snapshots(struct btrfs_root *root); 110 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 111 struct btrfs_root *root); 112 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 113 struct btrfs_root *root); 114 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,

··· 87 88 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 89 struct btrfs_root *root); 90 + int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 91 + struct btrfs_root *root); 92 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 93 int num_items); 94 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 95 int num_blocks); 96 + struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 97 + int num_blocks); 98 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 99 int num_blocks); 100 + int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 101 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 102 struct btrfs_root *root); 103 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, ··· 104 int btrfs_clean_old_snapshots(struct btrfs_root *root); 105 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 106 struct btrfs_root *root); 107 + int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 108 + struct btrfs_root *root, 109 + int wait_for_unblock); 110 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 111 struct btrfs_root *root); 112 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,

-2

fs/btrfs/tree-defrag.c

··· 36 int ret = 0; 37 int wret; 38 int level; 39 - int orig_level; 40 int is_extent = 0; 41 int next_key_ret = 0; 42 u64 last_ret = 0; ··· 63 return -ENOMEM; 64 65 level = btrfs_header_level(root->node); 66 - orig_level = level; 67 68 if (level == 0) 69 goto out;

··· 36 int ret = 0; 37 int wret; 38 int level; 39 int is_extent = 0; 40 int next_key_ret = 0; 41 u64 last_ret = 0; ··· 64 return -ENOMEM; 65 66 level = btrfs_header_level(root->node); 67 68 if (level == 0) 69 goto out;

+1 -16

fs/btrfs/tree-log.c

··· 786 { 787 struct inode *dir; 788 int ret; 789 - struct btrfs_key location; 790 struct btrfs_inode_ref *ref; 791 struct btrfs_dir_item *di; 792 struct inode *inode; ··· 793 int namelen; 794 unsigned long ref_ptr; 795 unsigned long ref_end; 796 - 797 - location.objectid = key->objectid; 798 - location.type = BTRFS_INODE_ITEM_KEY; 799 - location.offset = 0; 800 801 /* 802 * it is possible that we didn't log all the parent directories ··· 1578 struct btrfs_path *path; 1579 struct btrfs_root *root = wc->replay_dest; 1580 struct btrfs_key key; 1581 - u32 item_size; 1582 int level; 1583 int i; 1584 int ret; ··· 1595 nritems = btrfs_header_nritems(eb); 1596 for (i = 0; i < nritems; i++) { 1597 btrfs_item_key_to_cpu(eb, &key, i); 1598 - item_size = btrfs_item_size_nr(eb, i); 1599 1600 /* inode keys are done during the first stage */ 1601 if (key.type == BTRFS_INODE_ITEM_KEY && ··· 1661 struct walk_control *wc) 1662 { 1663 u64 root_owner; 1664 - u64 root_gen; 1665 u64 bytenr; 1666 u64 ptr_gen; 1667 struct extent_buffer *next; ··· 1690 1691 parent = path->nodes[*level]; 1692 root_owner = btrfs_header_owner(parent); 1693 - root_gen = btrfs_header_generation(parent); 1694 1695 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1696 ··· 1740 struct walk_control *wc) 1741 { 1742 u64 root_owner; 1743 - u64 root_gen; 1744 int i; 1745 int slot; 1746 int ret; ··· 1747 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1748 slot = path->slots[i]; 1749 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1750 - struct extent_buffer *node; 1751 - node = path->nodes[i]; 1752 path->slots[i]++; 1753 *level = i; 1754 WARN_ON(*level == 0); ··· 1759 parent = path->nodes[*level + 1]; 1760 1761 root_owner = btrfs_header_owner(parent); 1762 - root_gen = btrfs_header_generation(parent); 1763 wc->process_func(root, path->nodes[*level], wc, 1764 btrfs_header_generation(path->nodes[*level])); 1765 if (wc->free) { ··· 2260 } 2261 btrfs_end_log_trans(root); 2262 2263 - return 0; 2264 } 2265 2266 /* see comments for btrfs_del_dir_entries_in_log */ ··· 2716 struct btrfs_key max_key; 2717 struct btrfs_root *log = root->log_root; 2718 struct extent_buffer *src = NULL; 2719 - u32 size; 2720 int err = 0; 2721 int ret; 2722 int nritems; ··· 2779 break; 2780 2781 src = path->nodes[0]; 2782 - size = btrfs_item_size_nr(src, path->slots[0]); 2783 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 2784 ins_nr++; 2785 goto next_slot;

··· 786 { 787 struct inode *dir; 788 int ret; 789 struct btrfs_inode_ref *ref; 790 struct btrfs_dir_item *di; 791 struct inode *inode; ··· 794 int namelen; 795 unsigned long ref_ptr; 796 unsigned long ref_end; 797 798 /* 799 * it is possible that we didn't log all the parent directories ··· 1583 struct btrfs_path *path; 1584 struct btrfs_root *root = wc->replay_dest; 1585 struct btrfs_key key; 1586 int level; 1587 int i; 1588 int ret; ··· 1601 nritems = btrfs_header_nritems(eb); 1602 for (i = 0; i < nritems; i++) { 1603 btrfs_item_key_to_cpu(eb, &key, i); 1604 1605 /* inode keys are done during the first stage */ 1606 if (key.type == BTRFS_INODE_ITEM_KEY && ··· 1668 struct walk_control *wc) 1669 { 1670 u64 root_owner; 1671 u64 bytenr; 1672 u64 ptr_gen; 1673 struct extent_buffer *next; ··· 1698 1699 parent = path->nodes[*level]; 1700 root_owner = btrfs_header_owner(parent); 1701 1702 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1703 ··· 1749 struct walk_control *wc) 1750 { 1751 u64 root_owner; 1752 int i; 1753 int slot; 1754 int ret; ··· 1757 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1758 slot = path->slots[i]; 1759 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1760 path->slots[i]++; 1761 *level = i; 1762 WARN_ON(*level == 0); ··· 1771 parent = path->nodes[*level + 1]; 1772 1773 root_owner = btrfs_header_owner(parent); 1774 wc->process_func(root, path->nodes[*level], wc, 1775 btrfs_header_generation(path->nodes[*level])); 1776 if (wc->free) { ··· 2273 } 2274 btrfs_end_log_trans(root); 2275 2276 + return err; 2277 } 2278 2279 /* see comments for btrfs_del_dir_entries_in_log */ ··· 2729 struct btrfs_key max_key; 2730 struct btrfs_root *log = root->log_root; 2731 struct extent_buffer *src = NULL; 2732 int err = 0; 2733 int ret; 2734 int nritems; ··· 2793 break; 2794 2795 src = path->nodes[0]; 2796 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 2797 ins_nr++; 2798 goto next_slot;

+1 -6

fs/btrfs/volumes.c

··· 1898 u64 size_to_free; 1899 struct btrfs_path *path; 1900 struct btrfs_key key; 1901 - struct btrfs_chunk *chunk; 1902 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; 1903 struct btrfs_trans_handle *trans; 1904 struct btrfs_key found_key; ··· 1961 if (found_key.objectid != key.objectid) 1962 break; 1963 1964 - chunk = btrfs_item_ptr(path->nodes[0], 1965 - path->slots[0], 1966 - struct btrfs_chunk); 1967 /* chunk zero is special */ 1968 if (found_key.offset == 0) 1969 break; ··· 3027 } 3028 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 3029 dev = multi->stripes[dev_nr].dev; 3030 - BUG_ON(rw == WRITE && !dev->writeable); 3031 - if (dev && dev->bdev) { 3032 bio->bi_bdev = dev->bdev; 3033 if (async_submit) 3034 schedule_bio(root, dev, rw, bio);

··· 1898 u64 size_to_free; 1899 struct btrfs_path *path; 1900 struct btrfs_key key; 1901 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; 1902 struct btrfs_trans_handle *trans; 1903 struct btrfs_key found_key; ··· 1962 if (found_key.objectid != key.objectid) 1963 break; 1964 1965 /* chunk zero is special */ 1966 if (found_key.offset == 0) 1967 break; ··· 3031 } 3032 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 3033 dev = multi->stripes[dev_nr].dev; 3034 + if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 3035 bio->bi_bdev = dev->bdev; 3036 if (async_submit) 3037 schedule_bio(root, dev, rw, bio);

-2

fs/btrfs/xattr.c

··· 178 struct inode *inode = dentry->d_inode; 179 struct btrfs_root *root = BTRFS_I(inode)->root; 180 struct btrfs_path *path; 181 - struct btrfs_item *item; 182 struct extent_buffer *leaf; 183 struct btrfs_dir_item *di; 184 int ret = 0, slot, advance; ··· 233 } 234 advance = 1; 235 236 - item = btrfs_item_nr(leaf, slot); 237 btrfs_item_key_to_cpu(leaf, &found_key, slot); 238 239 /* check to make sure this item is what we want */

··· 178 struct inode *inode = dentry->d_inode; 179 struct btrfs_root *root = BTRFS_I(inode)->root; 180 struct btrfs_path *path; 181 struct extent_buffer *leaf; 182 struct btrfs_dir_item *di; 183 int ret = 0, slot, advance; ··· 234 } 235 advance = 1; 236 237 btrfs_item_key_to_cpu(leaf, &found_key, slot); 238 239 /* check to make sure this item is what we want */

-5

fs/btrfs/zlib.c

··· 199 int nr_pages = 0; 200 struct page *in_page = NULL; 201 struct page *out_page = NULL; 202 - int out_written = 0; 203 - int in_read = 0; 204 unsigned long bytes_left; 205 206 *out_pages = 0; ··· 230 workspace->def_strm.next_out = cpage_out; 231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 233 - 234 - out_written = 0; 235 - in_read = 0; 236 237 while (workspace->def_strm.total_in < len) { 238 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);

··· 199 int nr_pages = 0; 200 struct page *in_page = NULL; 201 struct page *out_page = NULL; 202 unsigned long bytes_left; 203 204 *out_pages = 0; ··· 232 workspace->def_strm.next_out = cpage_out; 233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 235 236 while (workspace->def_strm.total_in < len) { 237 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);

+40 -7

fs/fs-writeback.c

··· 1081 } 1082 1083 /** 1084 - * writeback_inodes_sb - writeback dirty inodes from given super_block 1085 * @sb: the superblock 1086 * 1087 * Start writeback on some inodes on this super_block. No guarantees are made 1088 * on how many (if any) will be written, and this function does not wait 1089 - * for IO completion of submitted IO. The number of pages submitted is 1090 - * returned. 1091 */ 1092 - void writeback_inodes_sb(struct super_block *sb) 1093 { 1094 DECLARE_COMPLETION_ONSTACK(done); 1095 struct wb_writeback_work work = { 1096 .sb = sb, 1097 .sync_mode = WB_SYNC_NONE, 1098 .done = &done, 1099 }; 1100 1101 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1102 - 1103 - work.nr_pages = get_nr_dirty_pages(); 1104 - 1105 bdi_queue_work(sb->s_bdi, &work); 1106 wait_for_completion(&done); 1107 } 1108 EXPORT_SYMBOL(writeback_inodes_sb); 1109 ··· 1137 return 0; 1138 } 1139 EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1140 1141 /** 1142 * sync_inodes_sb - sync sb inode pages

··· 1081 } 1082 1083 /** 1084 + * writeback_inodes_sb_nr - writeback dirty inodes from given super_block 1085 * @sb: the superblock 1086 + * @nr: the number of pages to write 1087 * 1088 * Start writeback on some inodes on this super_block. No guarantees are made 1089 * on how many (if any) will be written, and this function does not wait 1090 + * for IO completion of submitted IO. 1091 */ 1092 + void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) 1093 { 1094 DECLARE_COMPLETION_ONSTACK(done); 1095 struct wb_writeback_work work = { 1096 .sb = sb, 1097 .sync_mode = WB_SYNC_NONE, 1098 .done = &done, 1099 + .nr_pages = nr, 1100 }; 1101 1102 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1103 bdi_queue_work(sb->s_bdi, &work); 1104 wait_for_completion(&done); 1105 + } 1106 + EXPORT_SYMBOL(writeback_inodes_sb_nr); 1107 + 1108 + /** 1109 + * writeback_inodes_sb - writeback dirty inodes from given super_block 1110 + * @sb: the superblock 1111 + * 1112 + * Start writeback on some inodes on this super_block. No guarantees are made 1113 + * on how many (if any) will be written, and this function does not wait 1114 + * for IO completion of submitted IO. 1115 + */ 1116 + void writeback_inodes_sb(struct super_block *sb) 1117 + { 1118 + return writeback_inodes_sb_nr(sb, get_nr_dirty_pages()); 1119 } 1120 EXPORT_SYMBOL(writeback_inodes_sb); 1121 ··· 1125 return 0; 1126 } 1127 EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1128 + 1129 + /** 1130 + * writeback_inodes_sb_if_idle - start writeback if none underway 1131 + * @sb: the superblock 1132 + * @nr: the number of pages to write 1133 + * 1134 + * Invoke writeback_inodes_sb if no writeback is currently underway. 1135 + * Returns 1 if writeback was started, 0 if not. 1136 + */ 1137 + int writeback_inodes_sb_nr_if_idle(struct super_block *sb, 1138 + unsigned long nr) 1139 + { 1140 + if (!writeback_in_progress(sb->s_bdi)) { 1141 + down_read(&sb->s_umount); 1142 + writeback_inodes_sb_nr(sb, nr); 1143 + up_read(&sb->s_umount); 1144 + return 1; 1145 + } else 1146 + return 0; 1147 + } 1148 + EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); 1149 1150 /** 1151 * sync_inodes_sb - sync sb inode pages

+2

include/linux/writeback.h

··· 58 struct bdi_writeback; 59 int inode_wait(void *); 60 void writeback_inodes_sb(struct super_block *); 61 int writeback_inodes_sb_if_idle(struct super_block *); 62 void sync_inodes_sb(struct super_block *); 63 void writeback_inodes_wb(struct bdi_writeback *wb, 64 struct writeback_control *wbc);

··· 58 struct bdi_writeback; 59 int inode_wait(void *); 60 void writeback_inodes_sb(struct super_block *); 61 + void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); 62 int writeback_inodes_sb_if_idle(struct super_block *); 63 + int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); 64 void sync_inodes_sb(struct super_block *); 65 void writeback_inodes_wb(struct bdi_writeback *wb, 66 struct writeback_control *wbc);