Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (22 commits)
Btrfs: Fix async caching interaction with unmount
Btrfs: change how we unpin extents
Btrfs: Correct redundant test in add_inode_ref
Btrfs: find smallest available device extent during chunk allocation
Btrfs: clear all space_info->full after removing a block group
Btrfs: make flushoncommit mount option correctly wait on ordered_extents
Btrfs: Avoid delayed reference update looping
Btrfs: Fix ordering of key field checks in btrfs_previous_item
Btrfs: find_free_dev_extent doesn't handle holes at the start of the device
Btrfs: Remove code duplication in comp_keys
Btrfs: async block group caching
Btrfs: use hybrid extents+bitmap rb tree for free space
Btrfs: Fix crash on read failures at mount
Btrfs: remove of redundant btrfs_header_level
Btrfs: adjust NULL test
Btrfs: Remove broken sanity check from btrfs_rmap_block()
Btrfs: convert nested spin_lock_irqsave to spin_lock
Btrfs: make sure all dirty blocks are written at commit time
Btrfs: fix locking issue in btrfs_find_next_key
Btrfs: fix double increment of path->slots[0] in btrfs_next_leaf
...

+1353 -462
+2 -2
fs/btrfs/async-thread.c
··· 424 424 * list 425 425 */ 426 426 if (worker->idle) { 427 - spin_lock_irqsave(&worker->workers->lock, flags); 427 + spin_lock(&worker->workers->lock); 428 428 worker->idle = 0; 429 429 list_move_tail(&worker->worker_list, 430 430 &worker->workers->worker_list); 431 - spin_unlock_irqrestore(&worker->workers->lock, flags); 431 + spin_unlock(&worker->workers->lock); 432 432 } 433 433 if (!worker->working) { 434 434 wake = 1;
+68 -53
fs/btrfs/ctree.c
··· 557 557 558 558 btrfs_disk_key_to_cpu(&k1, disk); 559 559 560 - if (k1.objectid > k2->objectid) 561 - return 1; 562 - if (k1.objectid < k2->objectid) 563 - return -1; 564 - if (k1.type > k2->type) 565 - return 1; 566 - if (k1.type < k2->type) 567 - return -1; 568 - if (k1.offset > k2->offset) 569 - return 1; 570 - if (k1.offset < k2->offset) 571 - return -1; 572 - return 0; 560 + return btrfs_comp_cpu_keys(&k1, k2); 573 561 } 574 562 575 563 /* ··· 1038 1050 } 1039 1051 if (btrfs_header_nritems(mid) > 1040 1052 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1041 - return 0; 1042 - 1043 - if (btrfs_header_nritems(mid) > 2) 1044 1053 return 0; 1045 1054 1046 1055 if (btrfs_header_nritems(mid) < 2) ··· 1686 1701 struct extent_buffer *b; 1687 1702 int slot; 1688 1703 int ret; 1704 + int err; 1689 1705 int level; 1690 1706 int lowest_unlock = 1; 1691 1707 u8 lowest_level = 0; ··· 1723 1737 p->locks[level] = 1; 1724 1738 1725 1739 if (cow) { 1726 - int wret; 1727 - 1728 1740 /* 1729 1741 * if we don't really need to cow this block 1730 1742 * then we don't want to set the path blocking, ··· 1733 1749 1734 1750 btrfs_set_path_blocking(p); 1735 1751 1736 - wret = btrfs_cow_block(trans, root, b, 1737 - p->nodes[level + 1], 1738 - p->slots[level + 1], &b); 1739 - if (wret) { 1752 + err = btrfs_cow_block(trans, root, b, 1753 + p->nodes[level + 1], 1754 + p->slots[level + 1], &b); 1755 + if (err) { 1740 1756 free_extent_buffer(b); 1741 - ret = wret; 1757 + ret = err; 1742 1758 goto done; 1743 1759 } 1744 1760 } ··· 1777 1793 ret = bin_search(b, key, level, &slot); 1778 1794 1779 1795 if (level != 0) { 1780 - if (ret && slot > 0) 1796 + int dec = 0; 1797 + if (ret && slot > 0) { 1798 + dec = 1; 1781 1799 slot -= 1; 1800 + } 1782 1801 p->slots[level] = slot; 1783 - ret = setup_nodes_for_search(trans, root, p, b, level, 1802 + err = setup_nodes_for_search(trans, root, p, b, level, 1784 1803 ins_len); 1785 - if (ret == -EAGAIN) 1804 + if (err == -EAGAIN) 1786 1805 goto again; 1787 - else if (ret) 1806 + if (err) { 1807 + ret = err; 1788 1808 goto done; 1809 + } 1789 1810 b = p->nodes[level]; 1790 1811 slot = p->slots[level]; 1791 1812 1792 1813 unlock_up(p, level, lowest_unlock); 1793 1814 1794 - /* this is only true while dropping a snapshot */ 1795 1815 if (level == lowest_level) { 1796 - ret = 0; 1816 + if (dec) 1817 + p->slots[level]++; 1797 1818 goto done; 1798 1819 } 1799 1820 1800 - ret = read_block_for_search(trans, root, p, 1821 + err = read_block_for_search(trans, root, p, 1801 1822 &b, level, slot, key); 1802 - if (ret == -EAGAIN) 1823 + if (err == -EAGAIN) 1803 1824 goto again; 1804 - 1805 - if (ret == -EIO) 1825 + if (err) { 1826 + ret = err; 1806 1827 goto done; 1828 + } 1807 1829 1808 1830 if (!p->skip_locking) { 1809 - int lret; 1810 - 1811 1831 btrfs_clear_path_blocking(p, NULL); 1812 - lret = btrfs_try_spin_lock(b); 1832 + err = btrfs_try_spin_lock(b); 1813 1833 1814 - if (!lret) { 1834 + if (!err) { 1815 1835 btrfs_set_path_blocking(p); 1816 1836 btrfs_tree_lock(b); 1817 1837 btrfs_clear_path_blocking(p, b); ··· 1825 1837 p->slots[level] = slot; 1826 1838 if (ins_len > 0 && 1827 1839 btrfs_leaf_free_space(root, b) < ins_len) { 1828 - int sret; 1829 - 1830 1840 btrfs_set_path_blocking(p); 1831 - sret = split_leaf(trans, root, key, 1832 - p, ins_len, ret == 0); 1841 + err = split_leaf(trans, root, key, 1842 + p, ins_len, ret == 0); 1833 1843 btrfs_clear_path_blocking(p, NULL); 1834 1844 1835 - BUG_ON(sret > 0); 1836 - if (sret) { 1837 - ret = sret; 1845 + BUG_ON(err > 0); 1846 + if (err) { 1847 + ret = err; 1838 1848 goto done; 1839 1849 } 1840 1850 } ··· 3793 3807 } 3794 3808 3795 3809 /* delete the leaf if it is mostly empty */ 3796 - if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { 3810 + if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 3797 3811 /* push_leaf_left fixes the path. 3798 3812 * make sure the path still points to our leaf 3799 3813 * for possible call to del_ptr below ··· 4028 4042 * calling this function. 4029 4043 */ 4030 4044 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4031 - struct btrfs_key *key, int lowest_level, 4045 + struct btrfs_key *key, int level, 4032 4046 int cache_only, u64 min_trans) 4033 4047 { 4034 - int level = lowest_level; 4035 4048 int slot; 4036 4049 struct extent_buffer *c; 4037 4050 ··· 4043 4058 c = path->nodes[level]; 4044 4059 next: 4045 4060 if (slot >= btrfs_header_nritems(c)) { 4046 - level++; 4047 - if (level == BTRFS_MAX_LEVEL) 4061 + int ret; 4062 + int orig_lowest; 4063 + struct btrfs_key cur_key; 4064 + if (level + 1 >= BTRFS_MAX_LEVEL || 4065 + !path->nodes[level + 1]) 4048 4066 return 1; 4049 - continue; 4067 + 4068 + if (path->locks[level + 1]) { 4069 + level++; 4070 + continue; 4071 + } 4072 + 4073 + slot = btrfs_header_nritems(c) - 1; 4074 + if (level == 0) 4075 + btrfs_item_key_to_cpu(c, &cur_key, slot); 4076 + else 4077 + btrfs_node_key_to_cpu(c, &cur_key, slot); 4078 + 4079 + orig_lowest = path->lowest_level; 4080 + btrfs_release_path(root, path); 4081 + path->lowest_level = level; 4082 + ret = btrfs_search_slot(NULL, root, &cur_key, path, 4083 + 0, 0); 4084 + path->lowest_level = orig_lowest; 4085 + if (ret < 0) 4086 + return ret; 4087 + 4088 + c = path->nodes[level]; 4089 + slot = path->slots[level]; 4090 + if (ret == 0) 4091 + slot++; 4092 + goto next; 4050 4093 } 4094 + 4051 4095 if (level == 0) 4052 4096 btrfs_item_key_to_cpu(c, key, slot); 4053 4097 else { ··· 4160 4146 * advance the path if there are now more items available. 4161 4147 */ 4162 4148 if (nritems > 0 && path->slots[0] < nritems - 1) { 4163 - path->slots[0]++; 4149 + if (ret == 0) 4150 + path->slots[0]++; 4164 4151 ret = 0; 4165 4152 goto done; 4166 4153 } ··· 4293 4278 path->slots[0]--; 4294 4279 4295 4280 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4296 - if (found_key.type == type) 4297 - return 0; 4298 4281 if (found_key.objectid < min_objectid) 4299 4282 break; 4283 + if (found_key.type == type) 4284 + return 0; 4300 4285 if (found_key.objectid == min_objectid && 4301 4286 found_key.type < type) 4302 4287 break;
+25 -4
fs/btrfs/ctree.h
··· 481 481 482 482 struct btrfs_extent_inline_ref { 483 483 u8 type; 484 - u64 offset; 484 + __le64 offset; 485 485 } __attribute__ ((__packed__)); 486 486 487 487 /* old style backrefs item */ ··· 689 689 struct list_head block_groups; 690 690 spinlock_t lock; 691 691 struct rw_semaphore groups_sem; 692 + atomic_t caching_threads; 692 693 }; 693 694 694 695 /* ··· 708 707 /* first extent starting offset */ 709 708 u64 window_start; 710 709 710 + /* if this cluster simply points at a bitmap in the block group */ 711 + bool points_to_bitmap; 712 + 711 713 struct btrfs_block_group_cache *block_group; 712 714 /* 713 715 * when a cluster is allocated from a block group, we put the ··· 720 716 struct list_head block_group_list; 721 717 }; 722 718 719 + enum btrfs_caching_type { 720 + BTRFS_CACHE_NO = 0, 721 + BTRFS_CACHE_STARTED = 1, 722 + BTRFS_CACHE_FINISHED = 2, 723 + }; 724 + 723 725 struct btrfs_block_group_cache { 724 726 struct btrfs_key key; 725 727 struct btrfs_block_group_item item; 728 + struct btrfs_fs_info *fs_info; 726 729 spinlock_t lock; 727 - struct mutex cache_mutex; 728 730 u64 pinned; 729 731 u64 reserved; 730 732 u64 flags; 731 - int cached; 733 + u64 sectorsize; 734 + int extents_thresh; 735 + int free_extents; 736 + int total_bitmaps; 732 737 int ro; 733 738 int dirty; 739 + 740 + /* cache tracking stuff */ 741 + wait_queue_head_t caching_q; 742 + int cached; 734 743 735 744 struct btrfs_space_info *space_info; 736 745 737 746 /* free space cache stuff */ 738 747 spinlock_t tree_lock; 739 - struct rb_root free_space_bytes; 740 748 struct rb_root free_space_offset; 749 + u64 free_space; 741 750 742 751 /* block group cache stuff */ 743 752 struct rb_node cache_node; ··· 958 941 959 942 /* the node lock is held while changing the node pointer */ 960 943 spinlock_t node_lock; 944 + 945 + /* taken when updating the commit root */ 946 + struct rw_semaphore commit_root_sem; 961 947 962 948 struct extent_buffer *commit_root; 963 949 struct btrfs_root *log_root; ··· 2008 1988 u64 bytes); 2009 1989 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2010 1990 u64 bytes); 1991 + void btrfs_free_pinned_extents(struct btrfs_fs_info *info); 2011 1992 /* ctree.c */ 2012 1993 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2013 1994 int level, int *slot);
+15
fs/btrfs/disk-io.c
··· 909 909 spin_lock_init(&root->inode_lock); 910 910 mutex_init(&root->objectid_mutex); 911 911 mutex_init(&root->log_mutex); 912 + init_rwsem(&root->commit_root_sem); 912 913 init_waitqueue_head(&root->log_writer_wait); 913 914 init_waitqueue_head(&root->log_commit_wait[0]); 914 915 init_waitqueue_head(&root->log_commit_wait[1]); ··· 1800 1799 btrfs_super_chunk_root(disk_super), 1801 1800 blocksize, generation); 1802 1801 BUG_ON(!chunk_root->node); 1802 + if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 1803 + printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 1804 + sb->s_id); 1805 + goto fail_chunk_root; 1806 + } 1803 1807 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 1804 1808 chunk_root->commit_root = btrfs_root_node(chunk_root); 1805 1809 ··· 1832 1826 blocksize, generation); 1833 1827 if (!tree_root->node) 1834 1828 goto fail_chunk_root; 1829 + if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { 1830 + printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 1831 + sb->s_id); 1832 + goto fail_tree_root; 1833 + } 1835 1834 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 1836 1835 tree_root->commit_root = btrfs_root_node(tree_root); 1837 1836 ··· 2333 2322 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2334 2323 } 2335 2324 2325 + fs_info->closing = 2; 2326 + smp_mb(); 2327 + 2336 2328 if (fs_info->delalloc_bytes) { 2337 2329 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2338 2330 (unsigned long long)fs_info->delalloc_bytes); ··· 2357 2343 free_extent_buffer(root->fs_info->csum_root->commit_root); 2358 2344 2359 2345 btrfs_free_block_groups(root->fs_info); 2346 + btrfs_free_pinned_extents(root->fs_info); 2360 2347 2361 2348 del_fs_roots(fs_info); 2362 2349
+391 -129
fs/btrfs/extent-tree.c
··· 21 21 #include <linux/blkdev.h> 22 22 #include <linux/sort.h> 23 23 #include <linux/rcupdate.h> 24 + #include <linux/kthread.h> 24 25 #include "compat.h" 25 26 #include "hash.h" 26 27 #include "ctree.h" ··· 61 60 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 62 61 struct btrfs_root *extent_root, u64 alloc_bytes, 63 62 u64 flags, int force); 63 + 64 + static noinline int 65 + block_group_cache_done(struct btrfs_block_group_cache *cache) 66 + { 67 + smp_mb(); 68 + return cache->cached == BTRFS_CACHE_FINISHED; 69 + } 64 70 65 71 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 66 72 { ··· 154 146 } 155 147 156 148 /* 149 + * We always set EXTENT_LOCKED for the super mirror extents so we don't 150 + * overwrite them, so those bits need to be unset. Also, if we are unmounting 151 + * with pinned extents still sitting there because we had a block group caching, 152 + * we need to clear those now, since we are done. 153 + */ 154 + void btrfs_free_pinned_extents(struct btrfs_fs_info *info) 155 + { 156 + u64 start, end, last = 0; 157 + int ret; 158 + 159 + while (1) { 160 + ret = find_first_extent_bit(&info->pinned_extents, last, 161 + &start, &end, 162 + EXTENT_LOCKED|EXTENT_DIRTY); 163 + if (ret) 164 + break; 165 + 166 + clear_extent_bits(&info->pinned_extents, start, end, 167 + EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 168 + last = end+1; 169 + } 170 + } 171 + 172 + static int remove_sb_from_cache(struct btrfs_root *root, 173 + struct btrfs_block_group_cache *cache) 174 + { 175 + struct btrfs_fs_info *fs_info = root->fs_info; 176 + u64 bytenr; 177 + u64 *logical; 178 + int stripe_len; 179 + int i, nr, ret; 180 + 181 + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 182 + bytenr = btrfs_sb_offset(i); 183 + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 184 + cache->key.objectid, bytenr, 185 + 0, &logical, &nr, &stripe_len); 186 + BUG_ON(ret); 187 + while (nr--) { 188 + try_lock_extent(&fs_info->pinned_extents, 189 + logical[nr], 190 + logical[nr] + stripe_len - 1, GFP_NOFS); 191 + } 192 + kfree(logical); 193 + } 194 + 195 + return 0; 196 + } 197 + 198 + /* 157 199 * this is only called by cache_block_group, since we could have freed extents 158 200 * we need to check the pinned_extents for any extents that can't be used yet 159 201 * since their free space will be released as soon as the transaction commits. 160 202 */ 161 - static int add_new_free_space(struct btrfs_block_group_cache *block_group, 203 + static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 162 204 struct btrfs_fs_info *info, u64 start, u64 end) 163 205 { 164 - u64 extent_start, extent_end, size; 206 + u64 extent_start, extent_end, size, total_added = 0; 165 207 int ret; 166 208 167 209 while (start < end) { 168 210 ret = find_first_extent_bit(&info->pinned_extents, start, 169 211 &extent_start, &extent_end, 170 - EXTENT_DIRTY); 212 + EXTENT_DIRTY|EXTENT_LOCKED); 171 213 if (ret) 172 214 break; 173 215 ··· 225 167 start = extent_end + 1; 226 168 } else if (extent_start > start && extent_start < end) { 227 169 size = extent_start - start; 170 + total_added += size; 228 171 ret = btrfs_add_free_space(block_group, start, 229 172 size); 230 173 BUG_ON(ret); ··· 237 178 238 179 if (start < end) { 239 180 size = end - start; 181 + total_added += size; 240 182 ret = btrfs_add_free_space(block_group, start, size); 241 183 BUG_ON(ret); 242 184 } 243 185 244 - return 0; 186 + return total_added; 245 187 } 246 188 247 - static int remove_sb_from_cache(struct btrfs_root *root, 248 - struct btrfs_block_group_cache *cache) 189 + static int caching_kthread(void *data) 249 190 { 250 - u64 bytenr; 251 - u64 *logical; 252 - int stripe_len; 253 - int i, nr, ret; 254 - 255 - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 256 - bytenr = btrfs_sb_offset(i); 257 - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 258 - cache->key.objectid, bytenr, 0, 259 - &logical, &nr, &stripe_len); 260 - BUG_ON(ret); 261 - while (nr--) { 262 - btrfs_remove_free_space(cache, logical[nr], 263 - stripe_len); 264 - } 265 - kfree(logical); 266 - } 267 - return 0; 268 - } 269 - 270 - static int cache_block_group(struct btrfs_root *root, 271 - struct btrfs_block_group_cache *block_group) 272 - { 191 + struct btrfs_block_group_cache *block_group = data; 192 + struct btrfs_fs_info *fs_info = block_group->fs_info; 193 + u64 last = 0; 273 194 struct btrfs_path *path; 274 195 int ret = 0; 275 196 struct btrfs_key key; 276 197 struct extent_buffer *leaf; 277 198 int slot; 278 - u64 last; 199 + u64 total_found = 0; 279 200 280 - if (!block_group) 281 - return 0; 282 - 283 - root = root->fs_info->extent_root; 284 - 285 - if (block_group->cached) 286 - return 0; 201 + BUG_ON(!fs_info); 287 202 288 203 path = btrfs_alloc_path(); 289 204 if (!path) 290 205 return -ENOMEM; 291 206 292 - path->reada = 2; 207 + atomic_inc(&block_group->space_info->caching_threads); 208 + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 209 + again: 210 + /* need to make sure the commit_root doesn't disappear */ 211 + down_read(&fs_info->extent_root->commit_root_sem); 212 + 293 213 /* 294 - * we get into deadlocks with paths held by callers of this function. 295 - * since the alloc_mutex is protecting things right now, just 296 - * skip the locking here 214 + * We don't want to deadlock with somebody trying to allocate a new 215 + * extent for the extent root while also trying to search the extent 216 + * root to add free space. So we skip locking and search the commit 217 + * root, since its read-only 297 218 */ 298 219 path->skip_locking = 1; 299 - last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 220 + path->search_commit_root = 1; 221 + path->reada = 2; 222 + 300 223 key.objectid = last; 301 224 key.offset = 0; 302 225 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 303 - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 226 + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 304 227 if (ret < 0) 305 228 goto err; 306 229 307 230 while (1) { 231 + smp_mb(); 232 + if (block_group->fs_info->closing > 1) { 233 + last = (u64)-1; 234 + break; 235 + } 236 + 308 237 leaf = path->nodes[0]; 309 238 slot = path->slots[0]; 310 239 if (slot >= btrfs_header_nritems(leaf)) { 311 - ret = btrfs_next_leaf(root, path); 240 + ret = btrfs_next_leaf(fs_info->extent_root, path); 312 241 if (ret < 0) 313 242 goto err; 314 - if (ret == 0) 315 - continue; 316 - else 243 + else if (ret) 317 244 break; 245 + 246 + if (need_resched()) { 247 + btrfs_release_path(fs_info->extent_root, path); 248 + up_read(&fs_info->extent_root->commit_root_sem); 249 + cond_resched(); 250 + goto again; 251 + } 252 + 253 + continue; 318 254 } 319 255 btrfs_item_key_to_cpu(leaf, &key, slot); 320 256 if (key.objectid < block_group->key.objectid) ··· 320 266 break; 321 267 322 268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 323 - add_new_free_space(block_group, root->fs_info, last, 324 - key.objectid); 325 - 269 + total_found += add_new_free_space(block_group, 270 + fs_info, last, 271 + key.objectid); 326 272 last = key.objectid + key.offset; 273 + } 274 + 275 + if (total_found > (1024 * 1024 * 2)) { 276 + total_found = 0; 277 + wake_up(&block_group->caching_q); 327 278 } 328 279 next: 329 280 path->slots[0]++; 330 281 } 331 - 332 - add_new_free_space(block_group, root->fs_info, last, 333 - block_group->key.objectid + 334 - block_group->key.offset); 335 - 336 - block_group->cached = 1; 337 - remove_sb_from_cache(root, block_group); 338 282 ret = 0; 283 + 284 + total_found += add_new_free_space(block_group, fs_info, last, 285 + block_group->key.objectid + 286 + block_group->key.offset); 287 + 288 + spin_lock(&block_group->lock); 289 + block_group->cached = BTRFS_CACHE_FINISHED; 290 + spin_unlock(&block_group->lock); 291 + 339 292 err: 340 293 btrfs_free_path(path); 294 + up_read(&fs_info->extent_root->commit_root_sem); 295 + atomic_dec(&block_group->space_info->caching_threads); 296 + wake_up(&block_group->caching_q); 297 + 298 + return 0; 299 + } 300 + 301 + static int cache_block_group(struct btrfs_block_group_cache *cache) 302 + { 303 + struct task_struct *tsk; 304 + int ret = 0; 305 + 306 + spin_lock(&cache->lock); 307 + if (cache->cached != BTRFS_CACHE_NO) { 308 + spin_unlock(&cache->lock); 309 + return ret; 310 + } 311 + cache->cached = BTRFS_CACHE_STARTED; 312 + spin_unlock(&cache->lock); 313 + 314 + tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 315 + cache->key.objectid); 316 + if (IS_ERR(tsk)) { 317 + ret = PTR_ERR(tsk); 318 + printk(KERN_ERR "error running thread %d\n", ret); 319 + BUG(); 320 + } 321 + 341 322 return ret; 342 323 } 343 324 ··· 2476 2387 2477 2388 } 2478 2389 2390 + static struct btrfs_block_group_cache * 2391 + next_block_group(struct btrfs_root *root, 2392 + struct btrfs_block_group_cache *cache) 2393 + { 2394 + struct rb_node *node; 2395 + spin_lock(&root->fs_info->block_group_cache_lock); 2396 + node = rb_next(&cache->cache_node); 2397 + btrfs_put_block_group(cache); 2398 + if (node) { 2399 + cache = rb_entry(node, struct btrfs_block_group_cache, 2400 + cache_node); 2401 + atomic_inc(&cache->count); 2402 + } else 2403 + cache = NULL; 2404 + spin_unlock(&root->fs_info->block_group_cache_lock); 2405 + return cache; 2406 + } 2407 + 2479 2408 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2480 2409 struct btrfs_root *root) 2481 2410 { 2482 - struct btrfs_block_group_cache *cache, *entry; 2483 - struct rb_node *n; 2411 + struct btrfs_block_group_cache *cache; 2484 2412 int err = 0; 2485 - int werr = 0; 2486 2413 struct btrfs_path *path; 2487 2414 u64 last = 0; 2488 2415 ··· 2507 2402 return -ENOMEM; 2508 2403 2509 2404 while (1) { 2510 - cache = NULL; 2511 - spin_lock(&root->fs_info->block_group_cache_lock); 2512 - for (n = rb_first(&root->fs_info->block_group_cache_tree); 2513 - n; n = rb_next(n)) { 2514 - entry = rb_entry(n, struct btrfs_block_group_cache, 2515 - cache_node); 2516 - if (entry->dirty) { 2517 - cache = entry; 2518 - break; 2519 - } 2405 + if (last == 0) { 2406 + err = btrfs_run_delayed_refs(trans, root, 2407 + (unsigned long)-1); 2408 + BUG_ON(err); 2520 2409 } 2521 - spin_unlock(&root->fs_info->block_group_cache_lock); 2522 2410 2523 - if (!cache) 2524 - break; 2525 - 2526 - cache->dirty = 0; 2527 - last += cache->key.offset; 2528 - 2529 - err = write_one_cache_group(trans, root, 2530 - path, cache); 2531 - /* 2532 - * if we fail to write the cache group, we want 2533 - * to keep it marked dirty in hopes that a later 2534 - * write will work 2535 - */ 2536 - if (err) { 2537 - werr = err; 2411 + cache = btrfs_lookup_first_block_group(root->fs_info, last); 2412 + while (cache) { 2413 + if (cache->dirty) 2414 + break; 2415 + cache = next_block_group(root, cache); 2416 + } 2417 + if (!cache) { 2418 + if (last == 0) 2419 + break; 2420 + last = 0; 2538 2421 continue; 2539 2422 } 2423 + 2424 + cache->dirty = 0; 2425 + last = cache->key.objectid + cache->key.offset; 2426 + 2427 + err = write_one_cache_group(trans, root, path, cache); 2428 + BUG_ON(err); 2429 + btrfs_put_block_group(cache); 2540 2430 } 2431 + 2541 2432 btrfs_free_path(path); 2542 - return werr; 2433 + return 0; 2543 2434 } 2544 2435 2545 2436 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) ··· 2585 2484 found->force_alloc = 0; 2586 2485 *space_info = found; 2587 2486 list_add_rcu(&found->list, &info->space_info); 2487 + atomic_set(&found->caching_threads, 0); 2588 2488 return 0; 2589 2489 } 2590 2490 ··· 3049 2947 struct btrfs_block_group_cache *cache; 3050 2948 struct btrfs_fs_info *fs_info = root->fs_info; 3051 2949 3052 - if (pin) { 2950 + if (pin) 3053 2951 set_extent_dirty(&fs_info->pinned_extents, 3054 2952 bytenr, bytenr + num - 1, GFP_NOFS); 3055 - } else { 3056 - clear_extent_dirty(&fs_info->pinned_extents, 3057 - bytenr, bytenr + num - 1, GFP_NOFS); 3058 - } 3059 2953 3060 2954 while (num > 0) { 3061 2955 cache = btrfs_lookup_block_group(fs_info, bytenr); ··· 3067 2969 spin_unlock(&cache->space_info->lock); 3068 2970 fs_info->total_pinned += len; 3069 2971 } else { 2972 + int unpin = 0; 2973 + 2974 + /* 2975 + * in order to not race with the block group caching, we 2976 + * only want to unpin the extent if we are cached. If 2977 + * we aren't cached, we want to start async caching this 2978 + * block group so we can free the extent the next time 2979 + * around. 2980 + */ 3070 2981 spin_lock(&cache->space_info->lock); 3071 2982 spin_lock(&cache->lock); 3072 - cache->pinned -= len; 3073 - cache->space_info->bytes_pinned -= len; 2983 + unpin = (cache->cached == BTRFS_CACHE_FINISHED); 2984 + if (likely(unpin)) { 2985 + cache->pinned -= len; 2986 + cache->space_info->bytes_pinned -= len; 2987 + fs_info->total_pinned -= len; 2988 + } 3074 2989 spin_unlock(&cache->lock); 3075 2990 spin_unlock(&cache->space_info->lock); 3076 - fs_info->total_pinned -= len; 3077 - if (cache->cached) 2991 + 2992 + if (likely(unpin)) 2993 + clear_extent_dirty(&fs_info->pinned_extents, 2994 + bytenr, bytenr + len -1, 2995 + GFP_NOFS); 2996 + else 2997 + cache_block_group(cache); 2998 + 2999 + if (unpin) 3078 3000 btrfs_add_free_space(cache, bytenr, len); 3079 3001 } 3080 3002 btrfs_put_block_group(cache); ··· 3148 3030 &start, &end, EXTENT_DIRTY); 3149 3031 if (ret) 3150 3032 break; 3033 + 3151 3034 set_extent_dirty(copy, start, end, GFP_NOFS); 3152 3035 last = end + 1; 3153 3036 } ··· 3177 3058 3178 3059 cond_resched(); 3179 3060 } 3061 + 3180 3062 return ret; 3181 3063 } 3182 3064 ··· 3556 3436 } 3557 3437 3558 3438 /* 3439 + * when we wait for progress in the block group caching, its because 3440 + * our allocation attempt failed at least once. So, we must sleep 3441 + * and let some progress happen before we try again. 3442 + * 3443 + * This function will sleep at least once waiting for new free space to 3444 + * show up, and then it will check the block group free space numbers 3445 + * for our min num_bytes. Another option is to have it go ahead 3446 + * and look in the rbtree for a free extent of a given size, but this 3447 + * is a good start. 3448 + */ 3449 + static noinline int 3450 + wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3451 + u64 num_bytes) 3452 + { 3453 + DEFINE_WAIT(wait); 3454 + 3455 + prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3456 + 3457 + if (block_group_cache_done(cache)) { 3458 + finish_wait(&cache->caching_q, &wait); 3459 + return 0; 3460 + } 3461 + schedule(); 3462 + finish_wait(&cache->caching_q, &wait); 3463 + 3464 + wait_event(cache->caching_q, block_group_cache_done(cache) || 3465 + (cache->free_space >= num_bytes)); 3466 + return 0; 3467 + } 3468 + 3469 + enum btrfs_loop_type { 3470 + LOOP_CACHED_ONLY = 0, 3471 + LOOP_CACHING_NOWAIT = 1, 3472 + LOOP_CACHING_WAIT = 2, 3473 + LOOP_ALLOC_CHUNK = 3, 3474 + LOOP_NO_EMPTY_SIZE = 4, 3475 + }; 3476 + 3477 + /* 3559 3478 * walks the btree of allocated extents and find a hole of a given size. 3560 3479 * The key ins is changed to record the hole: 3561 3480 * ins->objectid == block start ··· 3619 3460 struct btrfs_space_info *space_info; 3620 3461 int last_ptr_loop = 0; 3621 3462 int loop = 0; 3463 + bool found_uncached_bg = false; 3622 3464 3623 3465 WARN_ON(num_bytes < root->sectorsize); 3624 3466 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); ··· 3651 3491 search_start = max(search_start, first_logical_byte(root, 0)); 3652 3492 search_start = max(search_start, hint_byte); 3653 3493 3654 - if (!last_ptr) { 3494 + if (!last_ptr) 3655 3495 empty_cluster = 0; 3656 - loop = 1; 3657 - } 3658 3496 3659 3497 if (search_start == hint_byte) { 3660 3498 block_group = btrfs_lookup_block_group(root->fs_info, 3661 3499 search_start); 3662 - if (block_group && block_group_bits(block_group, data)) { 3500 + /* 3501 + * we don't want to use the block group if it doesn't match our 3502 + * allocation bits, or if its not cached. 3503 + */ 3504 + if (block_group && block_group_bits(block_group, data) && 3505 + block_group_cache_done(block_group)) { 3663 3506 down_read(&space_info->groups_sem); 3664 3507 if (list_empty(&block_group->list) || 3665 3508 block_group->ro) { ··· 3685 3522 down_read(&space_info->groups_sem); 3686 3523 list_for_each_entry(block_group, &space_info->block_groups, list) { 3687 3524 u64 offset; 3525 + int cached; 3688 3526 3689 3527 atomic_inc(&block_group->count); 3690 3528 search_start = block_group->key.objectid; 3691 3529 3692 3530 have_block_group: 3693 - if (unlikely(!block_group->cached)) { 3694 - mutex_lock(&block_group->cache_mutex); 3695 - ret = cache_block_group(root, block_group); 3696 - mutex_unlock(&block_group->cache_mutex); 3697 - if (ret) { 3698 - btrfs_put_block_group(block_group); 3699 - break; 3531 + if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 3532 + /* 3533 + * we want to start caching kthreads, but not too many 3534 + * right off the bat so we don't overwhelm the system, 3535 + * so only start them if there are less than 2 and we're 3536 + * in the initial allocation phase. 3537 + */ 3538 + if (loop > LOOP_CACHING_NOWAIT || 3539 + atomic_read(&space_info->caching_threads) < 2) { 3540 + ret = cache_block_group(block_group); 3541 + BUG_ON(ret); 3700 3542 } 3543 + } 3544 + 3545 + cached = block_group_cache_done(block_group); 3546 + if (unlikely(!cached)) { 3547 + found_uncached_bg = true; 3548 + 3549 + /* if we only want cached bgs, loop */ 3550 + if (loop == LOOP_CACHED_ONLY) 3551 + goto loop; 3701 3552 } 3702 3553 3703 3554 if (unlikely(block_group->ro)) ··· 3792 3615 spin_unlock(&last_ptr->refill_lock); 3793 3616 goto checks; 3794 3617 } 3618 + } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 3619 + spin_unlock(&last_ptr->refill_lock); 3620 + 3621 + wait_block_group_cache_progress(block_group, 3622 + num_bytes + empty_cluster + empty_size); 3623 + goto have_block_group; 3795 3624 } 3625 + 3796 3626 /* 3797 3627 * at this point we either didn't find a cluster 3798 3628 * or we weren't able to allocate a block from our 3799 3629 * cluster. Free the cluster we've been trying 3800 3630 * to use, and go to the next block group 3801 3631 */ 3802 - if (loop < 2) { 3632 + if (loop < LOOP_NO_EMPTY_SIZE) { 3803 3633 btrfs_return_cluster_to_free_space(NULL, 3804 3634 last_ptr); 3805 3635 spin_unlock(&last_ptr->refill_lock); ··· 3817 3633 3818 3634 offset = btrfs_find_space_for_alloc(block_group, search_start, 3819 3635 num_bytes, empty_size); 3820 - if (!offset) 3636 + if (!offset && (cached || (!cached && 3637 + loop == LOOP_CACHING_NOWAIT))) { 3821 3638 goto loop; 3639 + } else if (!offset && (!cached && 3640 + loop > LOOP_CACHING_NOWAIT)) { 3641 + wait_block_group_cache_progress(block_group, 3642 + num_bytes + empty_size); 3643 + goto have_block_group; 3644 + } 3822 3645 checks: 3823 3646 search_start = stripe_align(root, offset); 3824 - 3825 3647 /* move on to the next group */ 3826 3648 if (search_start + num_bytes >= search_end) { 3827 3649 btrfs_add_free_space(block_group, offset, num_bytes); ··· 3873 3683 } 3874 3684 up_read(&space_info->groups_sem); 3875 3685 3876 - /* loop == 0, try to find a clustered alloc in every block group 3877 - * loop == 1, try again after forcing a chunk allocation 3878 - * loop == 2, set empty_size and empty_cluster to 0 and try again 3686 + /* LOOP_CACHED_ONLY, only search fully cached block groups 3687 + * LOOP_CACHING_NOWAIT, search partially cached block groups, but 3688 + * dont wait foR them to finish caching 3689 + * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 3690 + * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 3691 + * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 3692 + * again 3879 3693 */ 3880 - if (!ins->objectid && loop < 3 && 3881 - (empty_size || empty_cluster || allowed_chunk_alloc)) { 3882 - if (loop >= 2) { 3694 + if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 3695 + (found_uncached_bg || empty_size || empty_cluster || 3696 + allowed_chunk_alloc)) { 3697 + if (found_uncached_bg) { 3698 + found_uncached_bg = false; 3699 + if (loop < LOOP_CACHING_WAIT) { 3700 + loop++; 3701 + goto search; 3702 + } 3703 + } 3704 + 3705 + if (loop == LOOP_ALLOC_CHUNK) { 3883 3706 empty_size = 0; 3884 3707 empty_cluster = 0; 3885 3708 } ··· 3905 3702 space_info->force_alloc = 1; 3906 3703 } 3907 3704 3908 - if (loop < 3) { 3705 + if (loop < LOOP_NO_EMPTY_SIZE) { 3909 3706 loop++; 3910 3707 goto search; 3911 3708 } ··· 4001 3798 num_bytes, data, 1); 4002 3799 goto again; 4003 3800 } 4004 - if (ret) { 3801 + if (ret == -ENOSPC) { 4005 3802 struct btrfs_space_info *sinfo; 4006 3803 4007 3804 sinfo = __find_space_info(root->fs_info, data); ··· 4009 3806 "wanted %llu\n", (unsigned long long)data, 4010 3807 (unsigned long long)num_bytes); 4011 3808 dump_space_info(sinfo, num_bytes); 4012 - BUG(); 4013 3809 } 4014 3810 4015 3811 return ret; ··· 4046 3844 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4047 3845 empty_size, hint_byte, search_end, ins, 4048 3846 data); 4049 - update_reserved_extents(root, ins->objectid, ins->offset, 1); 3847 + if (!ret) 3848 + update_reserved_extents(root, ins->objectid, ins->offset, 1); 3849 + 4050 3850 return ret; 4051 3851 } 4052 3852 ··· 4210 4006 struct btrfs_block_group_cache *block_group; 4211 4007 4212 4008 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4213 - mutex_lock(&block_group->cache_mutex); 4214 - cache_block_group(root, block_group); 4215 - mutex_unlock(&block_group->cache_mutex); 4009 + cache_block_group(block_group); 4010 + wait_event(block_group->caching_q, 4011 + block_group_cache_done(block_group)); 4216 4012 4217 4013 ret = btrfs_remove_free_space(block_group, ins->objectid, 4218 4014 ins->offset); ··· 4243 4039 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4244 4040 empty_size, hint_byte, search_end, 4245 4041 ins, 0); 4246 - BUG_ON(ret); 4042 + if (ret) 4043 + return ret; 4247 4044 4248 4045 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4249 4046 if (parent == 0) ··· 7160 6955 &info->block_group_cache_tree); 7161 6956 spin_unlock(&info->block_group_cache_lock); 7162 6957 7163 - btrfs_remove_free_space_cache(block_group); 7164 6958 down_write(&block_group->space_info->groups_sem); 7165 6959 list_del(&block_group->list); 7166 6960 up_write(&block_group->space_info->groups_sem); 6961 + 6962 + if (block_group->cached == BTRFS_CACHE_STARTED) 6963 + wait_event(block_group->caching_q, 6964 + block_group_cache_done(block_group)); 6965 + 6966 + btrfs_remove_free_space_cache(block_group); 7167 6967 7168 6968 WARN_ON(atomic_read(&block_group->count) != 1); 7169 6969 kfree(block_group); ··· 7235 7025 atomic_set(&cache->count, 1); 7236 7026 spin_lock_init(&cache->lock); 7237 7027 spin_lock_init(&cache->tree_lock); 7238 - mutex_init(&cache->cache_mutex); 7028 + cache->fs_info = info; 7029 + init_waitqueue_head(&cache->caching_q); 7239 7030 INIT_LIST_HEAD(&cache->list); 7240 7031 INIT_LIST_HEAD(&cache->cluster_list); 7032 + 7033 + /* 7034 + * we only want to have 32k of ram per block group for keeping 7035 + * track of free space, and if we pass 1/2 of that we want to 7036 + * start converting things over to using bitmaps 7037 + */ 7038 + cache->extents_thresh = ((1024 * 32) / 2) / 7039 + sizeof(struct btrfs_free_space); 7040 + 7241 7041 read_extent_buffer(leaf, &cache->item, 7242 7042 btrfs_item_ptr_offset(leaf, path->slots[0]), 7243 7043 sizeof(cache->item)); ··· 7256 7036 key.objectid = found_key.objectid + found_key.offset; 7257 7037 btrfs_release_path(root, path); 7258 7038 cache->flags = btrfs_block_group_flags(&cache->item); 7039 + cache->sectorsize = root->sectorsize; 7040 + 7041 + remove_sb_from_cache(root, cache); 7042 + 7043 + /* 7044 + * check for two cases, either we are full, and therefore 7045 + * don't need to bother with the caching work since we won't 7046 + * find any space, or we are empty, and we can just add all 7047 + * the space in and be done with it. This saves us _alot_ of 7048 + * time, particularly in the full case. 7049 + */ 7050 + if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7051 + cache->cached = BTRFS_CACHE_FINISHED; 7052 + } else if (btrfs_block_group_used(&cache->item) == 0) { 7053 + cache->cached = BTRFS_CACHE_FINISHED; 7054 + add_new_free_space(cache, root->fs_info, 7055 + found_key.objectid, 7056 + found_key.objectid + 7057 + found_key.offset); 7058 + } 7259 7059 7260 7060 ret = update_space_info(info, cache->flags, found_key.offset, 7261 7061 btrfs_block_group_used(&cache->item), ··· 7319 7079 cache->key.objectid = chunk_offset; 7320 7080 cache->key.offset = size; 7321 7081 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7082 + cache->sectorsize = root->sectorsize; 7083 + 7084 + /* 7085 + * we only want to have 32k of ram per block group for keeping track 7086 + * of free space, and if we pass 1/2 of that we want to start 7087 + * converting things over to using bitmaps 7088 + */ 7089 + cache->extents_thresh = ((1024 * 32) / 2) / 7090 + sizeof(struct btrfs_free_space); 7322 7091 atomic_set(&cache->count, 1); 7323 7092 spin_lock_init(&cache->lock); 7324 7093 spin_lock_init(&cache->tree_lock); 7325 - mutex_init(&cache->cache_mutex); 7094 + init_waitqueue_head(&cache->caching_q); 7326 7095 INIT_LIST_HEAD(&cache->list); 7327 7096 INIT_LIST_HEAD(&cache->cluster_list); 7328 7097 ··· 7339 7090 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7340 7091 cache->flags = type; 7341 7092 btrfs_set_block_group_flags(&cache->item, type); 7093 + 7094 + cache->cached = BTRFS_CACHE_FINISHED; 7095 + remove_sb_from_cache(root, cache); 7096 + 7097 + add_new_free_space(cache, root->fs_info, chunk_offset, 7098 + chunk_offset + size); 7342 7099 7343 7100 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7344 7101 &cache->space_info); ··· 7404 7149 rb_erase(&block_group->cache_node, 7405 7150 &root->fs_info->block_group_cache_tree); 7406 7151 spin_unlock(&root->fs_info->block_group_cache_lock); 7407 - btrfs_remove_free_space_cache(block_group); 7152 + 7408 7153 down_write(&block_group->space_info->groups_sem); 7409 7154 /* 7410 7155 * we must use list_del_init so people can check to see if they ··· 7413 7158 list_del_init(&block_group->list); 7414 7159 up_write(&block_group->space_info->groups_sem); 7415 7160 7161 + if (block_group->cached == BTRFS_CACHE_STARTED) 7162 + wait_event(block_group->caching_q, 7163 + block_group_cache_done(block_group)); 7164 + 7165 + btrfs_remove_free_space_cache(block_group); 7166 + 7416 7167 spin_lock(&block_group->space_info->lock); 7417 7168 block_group->space_info->total_bytes -= block_group->key.offset; 7418 7169 block_group->space_info->bytes_readonly -= block_group->key.offset; 7419 7170 spin_unlock(&block_group->space_info->lock); 7420 - block_group->space_info->full = 0; 7171 + 7172 + btrfs_clear_space_info_full(root->fs_info); 7421 7173 7422 7174 btrfs_put_block_group(block_group); 7423 7175 btrfs_put_block_group(block_group);
+798 -221
fs/btrfs/free-space-cache.c
··· 16 16 * Boston, MA 021110-1307, USA. 17 17 */ 18 18 19 + #include <linux/pagemap.h> 19 20 #include <linux/sched.h> 21 + #include <linux/math64.h> 20 22 #include "ctree.h" 21 23 #include "free-space-cache.h" 22 24 #include "transaction.h" 23 25 24 - struct btrfs_free_space { 25 - struct rb_node bytes_index; 26 - struct rb_node offset_index; 27 - u64 offset; 28 - u64 bytes; 29 - }; 26 + #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 27 + #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 28 + 29 + static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 30 + u64 offset) 31 + { 32 + BUG_ON(offset < bitmap_start); 33 + offset -= bitmap_start; 34 + return (unsigned long)(div64_u64(offset, sectorsize)); 35 + } 36 + 37 + static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 38 + { 39 + return (unsigned long)(div64_u64(bytes, sectorsize)); 40 + } 41 + 42 + static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 43 + u64 offset) 44 + { 45 + u64 bitmap_start; 46 + u64 bytes_per_bitmap; 47 + 48 + bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 49 + bitmap_start = offset - block_group->key.objectid; 50 + bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 51 + bitmap_start *= bytes_per_bitmap; 52 + bitmap_start += block_group->key.objectid; 53 + 54 + return bitmap_start; 55 + } 30 56 31 57 static int tree_insert_offset(struct rb_root *root, u64 offset, 32 - struct rb_node *node) 58 + struct rb_node *node, int bitmap) 33 59 { 34 60 struct rb_node **p = &root->rb_node; 35 61 struct rb_node *parent = NULL; ··· 65 39 parent = *p; 66 40 info = rb_entry(parent, struct btrfs_free_space, offset_index); 67 41 68 - if (offset < info->offset) 42 + if (offset < info->offset) { 69 43 p = &(*p)->rb_left; 70 - else if (offset > info->offset) 44 + } else if (offset > info->offset) { 71 45 p = &(*p)->rb_right; 72 - else 73 - return -EEXIST; 74 - } 75 - 76 - rb_link_node(node, parent, p); 77 - rb_insert_color(node, root); 78 - 79 - return 0; 80 - } 81 - 82 - static int tree_insert_bytes(struct rb_root *root, u64 bytes, 83 - struct rb_node *node) 84 - { 85 - struct rb_node **p = &root->rb_node; 86 - struct rb_node *parent = NULL; 87 - struct btrfs_free_space *info; 88 - 89 - while (*p) { 90 - parent = *p; 91 - info = rb_entry(parent, struct btrfs_free_space, bytes_index); 92 - 93 - if (bytes < info->bytes) 94 - p = &(*p)->rb_left; 95 - else 96 - p = &(*p)->rb_right; 46 + } else { 47 + /* 48 + * we could have a bitmap entry and an extent entry 49 + * share the same offset. If this is the case, we want 50 + * the extent entry to always be found first if we do a 51 + * linear search through the tree, since we want to have 52 + * the quickest allocation time, and allocating from an 53 + * extent is faster than allocating from a bitmap. So 54 + * if we're inserting a bitmap and we find an entry at 55 + * this offset, we want to go right, or after this entry 56 + * logically. If we are inserting an extent and we've 57 + * found a bitmap, we want to go left, or before 58 + * logically. 59 + */ 60 + if (bitmap) { 61 + WARN_ON(info->bitmap); 62 + p = &(*p)->rb_right; 63 + } else { 64 + WARN_ON(!info->bitmap); 65 + p = &(*p)->rb_left; 66 + } 67 + } 97 68 } 98 69 99 70 rb_link_node(node, parent, p); ··· 102 79 /* 103 80 * searches the tree for the given offset. 104 81 * 105 - * fuzzy == 1: this is used for allocations where we are given a hint of where 106 - * to look for free space. Because the hint may not be completely on an offset 107 - * mark, or the hint may no longer point to free space we need to fudge our 108 - * results a bit. So we look for free space starting at or after offset with at 109 - * least bytes size. We prefer to find as close to the given offset as we can. 110 - * Also if the offset is within a free space range, then we will return the free 111 - * space that contains the given offset, which means we can return a free space 112 - * chunk with an offset before the provided offset. 113 - * 114 - * fuzzy == 0: this is just a normal tree search. Give us the free space that 115 - * starts at the given offset which is at least bytes size, and if its not there 116 - * return NULL. 82 + * fuzzy - If this is set, then we are trying to make an allocation, and we just 83 + * want a section that has at least bytes size and comes at or after the given 84 + * offset. 117 85 */ 118 - static struct btrfs_free_space *tree_search_offset(struct rb_root *root, 119 - u64 offset, u64 bytes, 120 - int fuzzy) 86 + static struct btrfs_free_space * 87 + tree_search_offset(struct btrfs_block_group_cache *block_group, 88 + u64 offset, int bitmap_only, int fuzzy) 121 89 { 122 - struct rb_node *n = root->rb_node; 123 - struct btrfs_free_space *entry, *ret = NULL; 90 + struct rb_node *n = block_group->free_space_offset.rb_node; 91 + struct btrfs_free_space *entry, *prev = NULL; 124 92 125 - while (n) { 126 - entry = rb_entry(n, struct btrfs_free_space, offset_index); 127 - 128 - if (offset < entry->offset) { 129 - if (fuzzy && 130 - (!ret || entry->offset < ret->offset) && 131 - (bytes <= entry->bytes)) 132 - ret = entry; 133 - n = n->rb_left; 134 - } else if (offset > entry->offset) { 135 - if (fuzzy && 136 - (entry->offset + entry->bytes - 1) >= offset && 137 - bytes <= entry->bytes) { 138 - ret = entry; 139 - break; 140 - } 141 - n = n->rb_right; 142 - } else { 143 - if (bytes > entry->bytes) { 144 - n = n->rb_right; 145 - continue; 146 - } 147 - ret = entry; 93 + /* find entry that is closest to the 'offset' */ 94 + while (1) { 95 + if (!n) { 96 + entry = NULL; 148 97 break; 149 98 } 99 + 100 + entry = rb_entry(n, struct btrfs_free_space, offset_index); 101 + prev = entry; 102 + 103 + if (offset < entry->offset) 104 + n = n->rb_left; 105 + else if (offset > entry->offset) 106 + n = n->rb_right; 107 + else 108 + break; 150 109 } 151 110 152 - return ret; 153 - } 111 + if (bitmap_only) { 112 + if (!entry) 113 + return NULL; 114 + if (entry->bitmap) 115 + return entry; 154 116 155 - /* 156 - * return a chunk at least bytes size, as close to offset that we can get. 157 - */ 158 - static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, 159 - u64 offset, u64 bytes) 160 - { 161 - struct rb_node *n = root->rb_node; 162 - struct btrfs_free_space *entry, *ret = NULL; 117 + /* 118 + * bitmap entry and extent entry may share same offset, 119 + * in that case, bitmap entry comes after extent entry. 120 + */ 121 + n = rb_next(n); 122 + if (!n) 123 + return NULL; 124 + entry = rb_entry(n, struct btrfs_free_space, offset_index); 125 + if (entry->offset != offset) 126 + return NULL; 163 127 164 - while (n) { 165 - entry = rb_entry(n, struct btrfs_free_space, bytes_index); 166 - 167 - if (bytes < entry->bytes) { 128 + WARN_ON(!entry->bitmap); 129 + return entry; 130 + } else if (entry) { 131 + if (entry->bitmap) { 168 132 /* 169 - * We prefer to get a hole size as close to the size we 170 - * are asking for so we don't take small slivers out of 171 - * huge holes, but we also want to get as close to the 172 - * offset as possible so we don't have a whole lot of 173 - * fragmentation. 133 + * if previous extent entry covers the offset, 134 + * we should return it instead of the bitmap entry 174 135 */ 175 - if (offset <= entry->offset) { 176 - if (!ret) 177 - ret = entry; 178 - else if (entry->bytes < ret->bytes) 179 - ret = entry; 180 - else if (entry->offset < ret->offset) 181 - ret = entry; 136 + n = &entry->offset_index; 137 + while (1) { 138 + n = rb_prev(n); 139 + if (!n) 140 + break; 141 + prev = rb_entry(n, struct btrfs_free_space, 142 + offset_index); 143 + if (!prev->bitmap) { 144 + if (prev->offset + prev->bytes > offset) 145 + entry = prev; 146 + break; 147 + } 182 148 } 183 - n = n->rb_left; 184 - } else if (bytes > entry->bytes) { 185 - n = n->rb_right; 149 + } 150 + return entry; 151 + } 152 + 153 + if (!prev) 154 + return NULL; 155 + 156 + /* find last entry before the 'offset' */ 157 + entry = prev; 158 + if (entry->offset > offset) { 159 + n = rb_prev(&entry->offset_index); 160 + if (n) { 161 + entry = rb_entry(n, struct btrfs_free_space, 162 + offset_index); 163 + BUG_ON(entry->offset > offset); 186 164 } else { 187 - /* 188 - * Ok we may have multiple chunks of the wanted size, 189 - * so we don't want to take the first one we find, we 190 - * want to take the one closest to our given offset, so 191 - * keep searching just in case theres a better match. 192 - */ 193 - n = n->rb_right; 194 - if (offset > entry->offset) 195 - continue; 196 - else if (!ret || entry->offset < ret->offset) 197 - ret = entry; 165 + if (fuzzy) 166 + return entry; 167 + else 168 + return NULL; 198 169 } 199 170 } 200 171 201 - return ret; 172 + if (entry->bitmap) { 173 + n = &entry->offset_index; 174 + while (1) { 175 + n = rb_prev(n); 176 + if (!n) 177 + break; 178 + prev = rb_entry(n, struct btrfs_free_space, 179 + offset_index); 180 + if (!prev->bitmap) { 181 + if (prev->offset + prev->bytes > offset) 182 + return prev; 183 + break; 184 + } 185 + } 186 + if (entry->offset + BITS_PER_BITMAP * 187 + block_group->sectorsize > offset) 188 + return entry; 189 + } else if (entry->offset + entry->bytes > offset) 190 + return entry; 191 + 192 + if (!fuzzy) 193 + return NULL; 194 + 195 + while (1) { 196 + if (entry->bitmap) { 197 + if (entry->offset + BITS_PER_BITMAP * 198 + block_group->sectorsize > offset) 199 + break; 200 + } else { 201 + if (entry->offset + entry->bytes > offset) 202 + break; 203 + } 204 + 205 + n = rb_next(&entry->offset_index); 206 + if (!n) 207 + return NULL; 208 + entry = rb_entry(n, struct btrfs_free_space, offset_index); 209 + } 210 + return entry; 202 211 } 203 212 204 213 static void unlink_free_space(struct btrfs_block_group_cache *block_group, 205 214 struct btrfs_free_space *info) 206 215 { 207 216 rb_erase(&info->offset_index, &block_group->free_space_offset); 208 - rb_erase(&info->bytes_index, &block_group->free_space_bytes); 217 + block_group->free_extents--; 218 + block_group->free_space -= info->bytes; 209 219 } 210 220 211 221 static int link_free_space(struct btrfs_block_group_cache *block_group, ··· 246 190 { 247 191 int ret = 0; 248 192 249 - 250 - BUG_ON(!info->bytes); 193 + BUG_ON(!info->bitmap && !info->bytes); 251 194 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 252 - &info->offset_index); 195 + &info->offset_index, (info->bitmap != NULL)); 253 196 if (ret) 254 197 return ret; 255 198 256 - ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, 257 - &info->bytes_index); 258 - if (ret) 259 - return ret; 199 + block_group->free_space += info->bytes; 200 + block_group->free_extents++; 201 + return ret; 202 + } 203 + 204 + static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 205 + { 206 + u64 max_bytes, possible_bytes; 207 + 208 + /* 209 + * The goal is to keep the total amount of memory used per 1gb of space 210 + * at or below 32k, so we need to adjust how much memory we allow to be 211 + * used by extent based free space tracking 212 + */ 213 + max_bytes = MAX_CACHE_BYTES_PER_GIG * 214 + (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 215 + 216 + possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + 217 + (sizeof(struct btrfs_free_space) * 218 + block_group->extents_thresh); 219 + 220 + if (possible_bytes > max_bytes) { 221 + int extent_bytes = max_bytes - 222 + (block_group->total_bitmaps * PAGE_CACHE_SIZE); 223 + 224 + if (extent_bytes <= 0) { 225 + block_group->extents_thresh = 0; 226 + return; 227 + } 228 + 229 + block_group->extents_thresh = extent_bytes / 230 + (sizeof(struct btrfs_free_space)); 231 + } 232 + } 233 + 234 + static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 235 + struct btrfs_free_space *info, u64 offset, 236 + u64 bytes) 237 + { 238 + unsigned long start, end; 239 + unsigned long i; 240 + 241 + start = offset_to_bit(info->offset, block_group->sectorsize, offset); 242 + end = start + bytes_to_bits(bytes, block_group->sectorsize); 243 + BUG_ON(end > BITS_PER_BITMAP); 244 + 245 + for (i = start; i < end; i++) 246 + clear_bit(i, info->bitmap); 247 + 248 + info->bytes -= bytes; 249 + block_group->free_space -= bytes; 250 + } 251 + 252 + static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 253 + struct btrfs_free_space *info, u64 offset, 254 + u64 bytes) 255 + { 256 + unsigned long start, end; 257 + unsigned long i; 258 + 259 + start = offset_to_bit(info->offset, block_group->sectorsize, offset); 260 + end = start + bytes_to_bits(bytes, block_group->sectorsize); 261 + BUG_ON(end > BITS_PER_BITMAP); 262 + 263 + for (i = start; i < end; i++) 264 + set_bit(i, info->bitmap); 265 + 266 + info->bytes += bytes; 267 + block_group->free_space += bytes; 268 + } 269 + 270 + static int search_bitmap(struct btrfs_block_group_cache *block_group, 271 + struct btrfs_free_space *bitmap_info, u64 *offset, 272 + u64 *bytes) 273 + { 274 + unsigned long found_bits = 0; 275 + unsigned long bits, i; 276 + unsigned long next_zero; 277 + 278 + i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 279 + max_t(u64, *offset, bitmap_info->offset)); 280 + bits = bytes_to_bits(*bytes, block_group->sectorsize); 281 + 282 + for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 283 + i < BITS_PER_BITMAP; 284 + i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 285 + next_zero = find_next_zero_bit(bitmap_info->bitmap, 286 + BITS_PER_BITMAP, i); 287 + if ((next_zero - i) >= bits) { 288 + found_bits = next_zero - i; 289 + break; 290 + } 291 + i = next_zero; 292 + } 293 + 294 + if (found_bits) { 295 + *offset = (u64)(i * block_group->sectorsize) + 296 + bitmap_info->offset; 297 + *bytes = (u64)(found_bits) * block_group->sectorsize; 298 + return 0; 299 + } 300 + 301 + return -1; 302 + } 303 + 304 + static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 305 + *block_group, u64 *offset, 306 + u64 *bytes, int debug) 307 + { 308 + struct btrfs_free_space *entry; 309 + struct rb_node *node; 310 + int ret; 311 + 312 + if (!block_group->free_space_offset.rb_node) 313 + return NULL; 314 + 315 + entry = tree_search_offset(block_group, 316 + offset_to_bitmap(block_group, *offset), 317 + 0, 1); 318 + if (!entry) 319 + return NULL; 320 + 321 + for (node = &entry->offset_index; node; node = rb_next(node)) { 322 + entry = rb_entry(node, struct btrfs_free_space, offset_index); 323 + if (entry->bytes < *bytes) 324 + continue; 325 + 326 + if (entry->bitmap) { 327 + ret = search_bitmap(block_group, entry, offset, bytes); 328 + if (!ret) 329 + return entry; 330 + continue; 331 + } 332 + 333 + *offset = entry->offset; 334 + *bytes = entry->bytes; 335 + return entry; 336 + } 337 + 338 + return NULL; 339 + } 340 + 341 + static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 342 + struct btrfs_free_space *info, u64 offset) 343 + { 344 + u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 345 + int max_bitmaps = (int)div64_u64(block_group->key.offset + 346 + bytes_per_bg - 1, bytes_per_bg); 347 + BUG_ON(block_group->total_bitmaps >= max_bitmaps); 348 + 349 + info->offset = offset_to_bitmap(block_group, offset); 350 + link_free_space(block_group, info); 351 + block_group->total_bitmaps++; 352 + 353 + recalculate_thresholds(block_group); 354 + } 355 + 356 + static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 357 + struct btrfs_free_space *bitmap_info, 358 + u64 *offset, u64 *bytes) 359 + { 360 + u64 end; 361 + 362 + again: 363 + end = bitmap_info->offset + 364 + (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 365 + 366 + if (*offset > bitmap_info->offset && *offset + *bytes > end) { 367 + bitmap_clear_bits(block_group, bitmap_info, *offset, 368 + end - *offset + 1); 369 + *bytes -= end - *offset + 1; 370 + *offset = end + 1; 371 + } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 372 + bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 373 + *bytes = 0; 374 + } 375 + 376 + if (*bytes) { 377 + if (!bitmap_info->bytes) { 378 + unlink_free_space(block_group, bitmap_info); 379 + kfree(bitmap_info->bitmap); 380 + kfree(bitmap_info); 381 + block_group->total_bitmaps--; 382 + recalculate_thresholds(block_group); 383 + } 384 + 385 + bitmap_info = tree_search_offset(block_group, 386 + offset_to_bitmap(block_group, 387 + *offset), 388 + 1, 0); 389 + if (!bitmap_info) 390 + return -EINVAL; 391 + 392 + if (!bitmap_info->bitmap) 393 + return -EAGAIN; 394 + 395 + goto again; 396 + } else if (!bitmap_info->bytes) { 397 + unlink_free_space(block_group, bitmap_info); 398 + kfree(bitmap_info->bitmap); 399 + kfree(bitmap_info); 400 + block_group->total_bitmaps--; 401 + recalculate_thresholds(block_group); 402 + } 403 + 404 + return 0; 405 + } 406 + 407 + static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 408 + struct btrfs_free_space *info) 409 + { 410 + struct btrfs_free_space *bitmap_info; 411 + int added = 0; 412 + u64 bytes, offset, end; 413 + int ret; 414 + 415 + /* 416 + * If we are below the extents threshold then we can add this as an 417 + * extent, and don't have to deal with the bitmap 418 + */ 419 + if (block_group->free_extents < block_group->extents_thresh && 420 + info->bytes > block_group->sectorsize * 4) 421 + return 0; 422 + 423 + /* 424 + * some block groups are so tiny they can't be enveloped by a bitmap, so 425 + * don't even bother to create a bitmap for this 426 + */ 427 + if (BITS_PER_BITMAP * block_group->sectorsize > 428 + block_group->key.offset) 429 + return 0; 430 + 431 + bytes = info->bytes; 432 + offset = info->offset; 433 + 434 + again: 435 + bitmap_info = tree_search_offset(block_group, 436 + offset_to_bitmap(block_group, offset), 437 + 1, 0); 438 + if (!bitmap_info) { 439 + BUG_ON(added); 440 + goto new_bitmap; 441 + } 442 + 443 + end = bitmap_info->offset + 444 + (u64)(BITS_PER_BITMAP * block_group->sectorsize); 445 + 446 + if (offset >= bitmap_info->offset && offset + bytes > end) { 447 + bitmap_set_bits(block_group, bitmap_info, offset, 448 + end - offset); 449 + bytes -= end - offset; 450 + offset = end; 451 + added = 0; 452 + } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 453 + bitmap_set_bits(block_group, bitmap_info, offset, bytes); 454 + bytes = 0; 455 + } else { 456 + BUG(); 457 + } 458 + 459 + if (!bytes) { 460 + ret = 1; 461 + goto out; 462 + } else 463 + goto again; 464 + 465 + new_bitmap: 466 + if (info && info->bitmap) { 467 + add_new_bitmap(block_group, info, offset); 468 + added = 1; 469 + info = NULL; 470 + goto again; 471 + } else { 472 + spin_unlock(&block_group->tree_lock); 473 + 474 + /* no pre-allocated info, allocate a new one */ 475 + if (!info) { 476 + info = kzalloc(sizeof(struct btrfs_free_space), 477 + GFP_NOFS); 478 + if (!info) { 479 + spin_lock(&block_group->tree_lock); 480 + ret = -ENOMEM; 481 + goto out; 482 + } 483 + } 484 + 485 + /* allocate the bitmap */ 486 + info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 487 + spin_lock(&block_group->tree_lock); 488 + if (!info->bitmap) { 489 + ret = -ENOMEM; 490 + goto out; 491 + } 492 + goto again; 493 + } 494 + 495 + out: 496 + if (info) { 497 + if (info->bitmap) 498 + kfree(info->bitmap); 499 + kfree(info); 500 + } 260 501 261 502 return ret; 262 503 } ··· 561 208 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 562 209 u64 offset, u64 bytes) 563 210 { 564 - struct btrfs_free_space *right_info; 565 - struct btrfs_free_space *left_info; 211 + struct btrfs_free_space *right_info = NULL; 212 + struct btrfs_free_space *left_info = NULL; 566 213 struct btrfs_free_space *info = NULL; 567 214 int ret = 0; 568 215 ··· 580 227 * are adding, if there is remove that struct and add a new one to 581 228 * cover the entire range 582 229 */ 583 - right_info = tree_search_offset(&block_group->free_space_offset, 584 - offset+bytes, 0, 0); 585 - left_info = tree_search_offset(&block_group->free_space_offset, 586 - offset-1, 0, 1); 230 + right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 231 + if (right_info && rb_prev(&right_info->offset_index)) 232 + left_info = rb_entry(rb_prev(&right_info->offset_index), 233 + struct btrfs_free_space, offset_index); 234 + else 235 + left_info = tree_search_offset(block_group, offset - 1, 0, 0); 587 236 588 - if (right_info) { 237 + /* 238 + * If there was no extent directly to the left or right of this new 239 + * extent then we know we're going to have to allocate a new extent, so 240 + * before we do that see if we need to drop this into a bitmap 241 + */ 242 + if ((!left_info || left_info->bitmap) && 243 + (!right_info || right_info->bitmap)) { 244 + ret = insert_into_bitmap(block_group, info); 245 + 246 + if (ret < 0) { 247 + goto out; 248 + } else if (ret) { 249 + ret = 0; 250 + goto out; 251 + } 252 + } 253 + 254 + if (right_info && !right_info->bitmap) { 589 255 unlink_free_space(block_group, right_info); 590 256 info->bytes += right_info->bytes; 591 257 kfree(right_info); 592 258 } 593 259 594 - if (left_info && left_info->offset + left_info->bytes == offset) { 260 + if (left_info && !left_info->bitmap && 261 + left_info->offset + left_info->bytes == offset) { 595 262 unlink_free_space(block_group, left_info); 596 263 info->offset = left_info->offset; 597 264 info->bytes += left_info->bytes; ··· 621 248 ret = link_free_space(block_group, info); 622 249 if (ret) 623 250 kfree(info); 624 - 251 + out: 625 252 spin_unlock(&block_group->tree_lock); 626 253 627 254 if (ret) { 628 - printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); 255 + printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 629 256 BUG_ON(ret == -EEXIST); 630 257 } 631 258 ··· 636 263 u64 offset, u64 bytes) 637 264 { 638 265 struct btrfs_free_space *info; 266 + struct btrfs_free_space *next_info = NULL; 639 267 int ret = 0; 640 268 641 269 spin_lock(&block_group->tree_lock); 642 270 643 - info = tree_search_offset(&block_group->free_space_offset, offset, 0, 644 - 1); 645 - if (info && info->offset == offset) { 646 - if (info->bytes < bytes) { 647 - printk(KERN_ERR "Found free space at %llu, size %llu," 648 - "trying to use %llu\n", 649 - (unsigned long long)info->offset, 650 - (unsigned long long)info->bytes, 651 - (unsigned long long)bytes); 271 + again: 272 + info = tree_search_offset(block_group, offset, 0, 0); 273 + if (!info) { 274 + WARN_ON(1); 275 + goto out_lock; 276 + } 277 + 278 + if (info->bytes < bytes && rb_next(&info->offset_index)) { 279 + u64 end; 280 + next_info = rb_entry(rb_next(&info->offset_index), 281 + struct btrfs_free_space, 282 + offset_index); 283 + 284 + if (next_info->bitmap) 285 + end = next_info->offset + BITS_PER_BITMAP * 286 + block_group->sectorsize - 1; 287 + else 288 + end = next_info->offset + next_info->bytes; 289 + 290 + if (next_info->bytes < bytes || 291 + next_info->offset > offset || offset > end) { 292 + printk(KERN_CRIT "Found free space at %llu, size %llu," 293 + " trying to use %llu\n", 294 + (unsigned long long)info->offset, 295 + (unsigned long long)info->bytes, 296 + (unsigned long long)bytes); 652 297 WARN_ON(1); 653 298 ret = -EINVAL; 654 - spin_unlock(&block_group->tree_lock); 655 - goto out; 299 + goto out_lock; 656 300 } 301 + 302 + info = next_info; 303 + } 304 + 305 + if (info->bytes == bytes) { 657 306 unlink_free_space(block_group, info); 658 - 659 - if (info->bytes == bytes) { 660 - kfree(info); 661 - spin_unlock(&block_group->tree_lock); 662 - goto out; 307 + if (info->bitmap) { 308 + kfree(info->bitmap); 309 + block_group->total_bitmaps--; 663 310 } 311 + kfree(info); 312 + goto out_lock; 313 + } 664 314 315 + if (!info->bitmap && info->offset == offset) { 316 + unlink_free_space(block_group, info); 665 317 info->offset += bytes; 666 318 info->bytes -= bytes; 319 + link_free_space(block_group, info); 320 + goto out_lock; 321 + } 667 322 668 - ret = link_free_space(block_group, info); 669 - spin_unlock(&block_group->tree_lock); 670 - BUG_ON(ret); 671 - } else if (info && info->offset < offset && 672 - info->offset + info->bytes >= offset + bytes) { 323 + if (!info->bitmap && info->offset <= offset && 324 + info->offset + info->bytes >= offset + bytes) { 673 325 u64 old_start = info->offset; 674 326 /* 675 327 * we're freeing space in the middle of the info, ··· 710 312 info->offset = offset + bytes; 711 313 info->bytes = old_end - info->offset; 712 314 ret = link_free_space(block_group, info); 713 - BUG_ON(ret); 315 + WARN_ON(ret); 316 + if (ret) 317 + goto out_lock; 714 318 } else { 715 319 /* the hole we're creating ends at the end 716 320 * of the info struct, just free the info ··· 720 320 kfree(info); 721 321 } 722 322 spin_unlock(&block_group->tree_lock); 723 - /* step two, insert a new info struct to cover anything 724 - * before the hole 323 + 324 + /* step two, insert a new info struct to cover 325 + * anything before the hole 725 326 */ 726 327 ret = btrfs_add_free_space(block_group, old_start, 727 328 offset - old_start); 728 - BUG_ON(ret); 729 - } else { 730 - spin_unlock(&block_group->tree_lock); 731 - if (!info) { 732 - printk(KERN_ERR "couldn't find space %llu to free\n", 733 - (unsigned long long)offset); 734 - printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", 735 - block_group->cached, 736 - (unsigned long long)block_group->key.objectid, 737 - (unsigned long long)block_group->key.offset); 738 - btrfs_dump_free_space(block_group, bytes); 739 - } else if (info) { 740 - printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " 741 - "but wanted offset=%llu bytes=%llu\n", 742 - (unsigned long long)info->offset, 743 - (unsigned long long)info->bytes, 744 - (unsigned long long)offset, 745 - (unsigned long long)bytes); 746 - } 747 - WARN_ON(1); 329 + WARN_ON(ret); 330 + goto out; 748 331 } 332 + 333 + ret = remove_from_bitmap(block_group, info, &offset, &bytes); 334 + if (ret == -EAGAIN) 335 + goto again; 336 + BUG_ON(ret); 337 + out_lock: 338 + spin_unlock(&block_group->tree_lock); 749 339 out: 750 340 return ret; 751 341 } ··· 751 361 info = rb_entry(n, struct btrfs_free_space, offset_index); 752 362 if (info->bytes >= bytes) 753 363 count++; 754 - printk(KERN_ERR "entry offset %llu, bytes %llu\n", 364 + printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 755 365 (unsigned long long)info->offset, 756 - (unsigned long long)info->bytes); 366 + (unsigned long long)info->bytes, 367 + (info->bitmap) ? "yes" : "no"); 757 368 } 369 + printk(KERN_INFO "block group has cluster?: %s\n", 370 + list_empty(&block_group->cluster_list) ? "no" : "yes"); 758 371 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 759 372 "\n", count); 760 373 } ··· 790 397 { 791 398 struct btrfs_free_space *entry; 792 399 struct rb_node *node; 400 + bool bitmap; 793 401 794 402 spin_lock(&cluster->lock); 795 403 if (cluster->block_group != block_group) 796 404 goto out; 797 405 406 + bitmap = cluster->points_to_bitmap; 407 + cluster->block_group = NULL; 798 408 cluster->window_start = 0; 409 + list_del_init(&cluster->block_group_list); 410 + cluster->points_to_bitmap = false; 411 + 412 + if (bitmap) 413 + goto out; 414 + 799 415 node = rb_first(&cluster->root); 800 - while(node) { 416 + while (node) { 801 417 entry = rb_entry(node, struct btrfs_free_space, offset_index); 802 418 node = rb_next(&entry->offset_index); 803 419 rb_erase(&entry->offset_index, &cluster->root); 804 - link_free_space(block_group, entry); 420 + BUG_ON(entry->bitmap); 421 + tree_insert_offset(&block_group->free_space_offset, 422 + entry->offset, &entry->offset_index, 0); 805 423 } 806 - list_del_init(&cluster->block_group_list); 807 - 808 - btrfs_put_block_group(cluster->block_group); 809 - cluster->block_group = NULL; 810 424 cluster->root.rb_node = NULL; 425 + 811 426 out: 812 427 spin_unlock(&cluster->lock); 428 + btrfs_put_block_group(block_group); 813 429 return 0; 814 430 } 815 431 ··· 827 425 struct btrfs_free_space *info; 828 426 struct rb_node *node; 829 427 struct btrfs_free_cluster *cluster; 830 - struct btrfs_free_cluster *safe; 428 + struct list_head *head; 831 429 832 430 spin_lock(&block_group->tree_lock); 833 - 834 - list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, 835 - block_group_list) { 431 + while ((head = block_group->cluster_list.next) != 432 + &block_group->cluster_list) { 433 + cluster = list_entry(head, struct btrfs_free_cluster, 434 + block_group_list); 836 435 837 436 WARN_ON(cluster->block_group != block_group); 838 437 __btrfs_return_cluster_to_free_space(block_group, cluster); 438 + if (need_resched()) { 439 + spin_unlock(&block_group->tree_lock); 440 + cond_resched(); 441 + spin_lock(&block_group->tree_lock); 442 + } 839 443 } 840 444 841 - while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { 842 - info = rb_entry(node, struct btrfs_free_space, bytes_index); 445 + while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 446 + info = rb_entry(node, struct btrfs_free_space, offset_index); 843 447 unlink_free_space(block_group, info); 448 + if (info->bitmap) 449 + kfree(info->bitmap); 844 450 kfree(info); 845 451 if (need_resched()) { 846 452 spin_unlock(&block_group->tree_lock); ··· 856 446 spin_lock(&block_group->tree_lock); 857 447 } 858 448 } 449 + 859 450 spin_unlock(&block_group->tree_lock); 860 451 } 861 452 ··· 864 453 u64 offset, u64 bytes, u64 empty_size) 865 454 { 866 455 struct btrfs_free_space *entry = NULL; 456 + u64 bytes_search = bytes + empty_size; 867 457 u64 ret = 0; 868 458 869 459 spin_lock(&block_group->tree_lock); 870 - entry = tree_search_offset(&block_group->free_space_offset, offset, 871 - bytes + empty_size, 1); 460 + entry = find_free_space(block_group, &offset, &bytes_search, 0); 872 461 if (!entry) 873 - entry = tree_search_bytes(&block_group->free_space_bytes, 874 - offset, bytes + empty_size); 875 - if (entry) { 462 + goto out; 463 + 464 + ret = offset; 465 + if (entry->bitmap) { 466 + bitmap_clear_bits(block_group, entry, offset, bytes); 467 + if (!entry->bytes) { 468 + unlink_free_space(block_group, entry); 469 + kfree(entry->bitmap); 470 + kfree(entry); 471 + block_group->total_bitmaps--; 472 + recalculate_thresholds(block_group); 473 + } 474 + } else { 876 475 unlink_free_space(block_group, entry); 877 - ret = entry->offset; 878 476 entry->offset += bytes; 879 477 entry->bytes -= bytes; 880 - 881 478 if (!entry->bytes) 882 479 kfree(entry); 883 480 else 884 481 link_free_space(block_group, entry); 885 482 } 483 + 484 + out: 886 485 spin_unlock(&block_group->tree_lock); 887 486 888 487 return ret; ··· 938 517 return ret; 939 518 } 940 519 520 + static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 521 + struct btrfs_free_cluster *cluster, 522 + u64 bytes, u64 min_start) 523 + { 524 + struct btrfs_free_space *entry; 525 + int err; 526 + u64 search_start = cluster->window_start; 527 + u64 search_bytes = bytes; 528 + u64 ret = 0; 529 + 530 + spin_lock(&block_group->tree_lock); 531 + spin_lock(&cluster->lock); 532 + 533 + if (!cluster->points_to_bitmap) 534 + goto out; 535 + 536 + if (cluster->block_group != block_group) 537 + goto out; 538 + 539 + entry = tree_search_offset(block_group, search_start, 0, 0); 540 + 541 + if (!entry || !entry->bitmap) 542 + goto out; 543 + 544 + search_start = min_start; 545 + search_bytes = bytes; 546 + 547 + err = search_bitmap(block_group, entry, &search_start, 548 + &search_bytes); 549 + if (err) 550 + goto out; 551 + 552 + ret = search_start; 553 + bitmap_clear_bits(block_group, entry, ret, bytes); 554 + out: 555 + spin_unlock(&cluster->lock); 556 + spin_unlock(&block_group->tree_lock); 557 + 558 + return ret; 559 + } 560 + 941 561 /* 942 562 * given a cluster, try to allocate 'bytes' from it, returns 0 943 563 * if it couldn't find anything suitably large, or a logical disk offset ··· 991 529 struct btrfs_free_space *entry = NULL; 992 530 struct rb_node *node; 993 531 u64 ret = 0; 532 + 533 + if (cluster->points_to_bitmap) 534 + return btrfs_alloc_from_bitmap(block_group, cluster, bytes, 535 + min_start); 994 536 995 537 spin_lock(&cluster->lock); 996 538 if (bytes > cluster->max_size) ··· 1033 567 } 1034 568 out: 1035 569 spin_unlock(&cluster->lock); 570 + 1036 571 return ret; 572 + } 573 + 574 + static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 575 + struct btrfs_free_space *entry, 576 + struct btrfs_free_cluster *cluster, 577 + u64 offset, u64 bytes, u64 min_bytes) 578 + { 579 + unsigned long next_zero; 580 + unsigned long i; 581 + unsigned long search_bits; 582 + unsigned long total_bits; 583 + unsigned long found_bits; 584 + unsigned long start = 0; 585 + unsigned long total_found = 0; 586 + bool found = false; 587 + 588 + i = offset_to_bit(entry->offset, block_group->sectorsize, 589 + max_t(u64, offset, entry->offset)); 590 + search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 591 + total_bits = bytes_to_bits(bytes, block_group->sectorsize); 592 + 593 + again: 594 + found_bits = 0; 595 + for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 596 + i < BITS_PER_BITMAP; 597 + i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 598 + next_zero = find_next_zero_bit(entry->bitmap, 599 + BITS_PER_BITMAP, i); 600 + if (next_zero - i >= search_bits) { 601 + found_bits = next_zero - i; 602 + break; 603 + } 604 + i = next_zero; 605 + } 606 + 607 + if (!found_bits) 608 + return -1; 609 + 610 + if (!found) { 611 + start = i; 612 + found = true; 613 + } 614 + 615 + total_found += found_bits; 616 + 617 + if (cluster->max_size < found_bits * block_group->sectorsize) 618 + cluster->max_size = found_bits * block_group->sectorsize; 619 + 620 + if (total_found < total_bits) { 621 + i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 622 + if (i - start > total_bits * 2) { 623 + total_found = 0; 624 + cluster->max_size = 0; 625 + found = false; 626 + } 627 + goto again; 628 + } 629 + 630 + cluster->window_start = start * block_group->sectorsize + 631 + entry->offset; 632 + cluster->points_to_bitmap = true; 633 + 634 + return 0; 1037 635 } 1038 636 1039 637 /* ··· 1117 587 struct btrfs_free_space *entry = NULL; 1118 588 struct rb_node *node; 1119 589 struct btrfs_free_space *next; 1120 - struct btrfs_free_space *last; 590 + struct btrfs_free_space *last = NULL; 1121 591 u64 min_bytes; 1122 592 u64 window_start; 1123 593 u64 window_free; 1124 594 u64 max_extent = 0; 1125 - int total_retries = 0; 595 + bool found_bitmap = false; 1126 596 int ret; 1127 597 1128 598 /* for metadata, allow allocates with more holes */ ··· 1150 620 goto out; 1151 621 } 1152 622 again: 1153 - min_bytes = min(min_bytes, bytes + empty_size); 1154 - entry = tree_search_bytes(&block_group->free_space_bytes, 1155 - offset, min_bytes); 623 + entry = tree_search_offset(block_group, offset, found_bitmap, 1); 1156 624 if (!entry) { 1157 625 ret = -ENOSPC; 1158 626 goto out; 1159 627 } 628 + 629 + /* 630 + * If found_bitmap is true, we exhausted our search for extent entries, 631 + * and we just want to search all of the bitmaps that we can find, and 632 + * ignore any extent entries we find. 633 + */ 634 + while (entry->bitmap || found_bitmap || 635 + (!entry->bitmap && entry->bytes < min_bytes)) { 636 + struct rb_node *node = rb_next(&entry->offset_index); 637 + 638 + if (entry->bitmap && entry->bytes > bytes + empty_size) { 639 + ret = btrfs_bitmap_cluster(block_group, entry, cluster, 640 + offset, bytes + empty_size, 641 + min_bytes); 642 + if (!ret) 643 + goto got_it; 644 + } 645 + 646 + if (!node) { 647 + ret = -ENOSPC; 648 + goto out; 649 + } 650 + entry = rb_entry(node, struct btrfs_free_space, offset_index); 651 + } 652 + 653 + /* 654 + * We already searched all the extent entries from the passed in offset 655 + * to the end and didn't find enough space for the cluster, and we also 656 + * didn't find any bitmaps that met our criteria, just go ahead and exit 657 + */ 658 + if (found_bitmap) { 659 + ret = -ENOSPC; 660 + goto out; 661 + } 662 + 663 + cluster->points_to_bitmap = false; 1160 664 window_start = entry->offset; 1161 665 window_free = entry->bytes; 1162 666 last = entry; 1163 667 max_extent = entry->bytes; 1164 668 1165 - while(1) { 669 + while (1) { 1166 670 /* out window is just right, lets fill it */ 1167 671 if (window_free >= bytes + empty_size) 1168 672 break; 1169 673 1170 674 node = rb_next(&last->offset_index); 1171 675 if (!node) { 676 + if (found_bitmap) 677 + goto again; 1172 678 ret = -ENOSPC; 1173 679 goto out; 1174 680 } 1175 681 next = rb_entry(node, struct btrfs_free_space, offset_index); 682 + 683 + /* 684 + * we found a bitmap, so if this search doesn't result in a 685 + * cluster, we know to go and search again for the bitmaps and 686 + * start looking for space there 687 + */ 688 + if (next->bitmap) { 689 + if (!found_bitmap) 690 + offset = next->offset; 691 + found_bitmap = true; 692 + last = next; 693 + continue; 694 + } 1176 695 1177 696 /* 1178 697 * we haven't filled the empty size and the window is ··· 1234 655 window_free = entry->bytes; 1235 656 last = entry; 1236 657 max_extent = 0; 1237 - total_retries++; 1238 - if (total_retries % 64 == 0) { 1239 - if (min_bytes >= (bytes + empty_size)) { 1240 - ret = -ENOSPC; 1241 - goto out; 1242 - } 1243 - /* 1244 - * grow our allocation a bit, we're not having 1245 - * much luck 1246 - */ 1247 - min_bytes *= 2; 1248 - goto again; 1249 - } 1250 658 } else { 1251 659 last = next; 1252 660 window_free += next->bytes; ··· 1251 685 * The cluster includes an rbtree, but only uses the offset index 1252 686 * of each free space cache entry. 1253 687 */ 1254 - while(1) { 688 + while (1) { 1255 689 node = rb_next(&entry->offset_index); 1256 - unlink_free_space(block_group, entry); 690 + if (entry->bitmap && node) { 691 + entry = rb_entry(node, struct btrfs_free_space, 692 + offset_index); 693 + continue; 694 + } else if (entry->bitmap && !node) { 695 + break; 696 + } 697 + 698 + rb_erase(&entry->offset_index, &block_group->free_space_offset); 1257 699 ret = tree_insert_offset(&cluster->root, entry->offset, 1258 - &entry->offset_index); 700 + &entry->offset_index, 0); 1259 701 BUG_ON(ret); 1260 702 1261 703 if (!node || entry == last) ··· 1271 697 1272 698 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1273 699 } 1274 - ret = 0; 700 + 1275 701 cluster->max_size = max_extent; 702 + got_it: 703 + ret = 0; 1276 704 atomic_inc(&block_group->count); 1277 705 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1278 706 cluster->block_group = block_group; ··· 1294 718 spin_lock_init(&cluster->refill_lock); 1295 719 cluster->root.rb_node = NULL; 1296 720 cluster->max_size = 0; 721 + cluster->points_to_bitmap = false; 1297 722 INIT_LIST_HEAD(&cluster->block_group_list); 1298 723 cluster->block_group = NULL; 1299 724 }
+8
fs/btrfs/free-space-cache.h
··· 19 19 #ifndef __BTRFS_FREE_SPACE_CACHE 20 20 #define __BTRFS_FREE_SPACE_CACHE 21 21 22 + struct btrfs_free_space { 23 + struct rb_node offset_index; 24 + u64 offset; 25 + u64 bytes; 26 + unsigned long *bitmap; 27 + struct list_head list; 28 + }; 29 + 22 30 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 23 31 u64 bytenr, u64 size); 24 32 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
+1 -1
fs/btrfs/inode.c
··· 2603 2603 if (root->ref_cows) 2604 2604 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2605 2605 path = btrfs_alloc_path(); 2606 - path->reada = -1; 2607 2606 BUG_ON(!path); 2607 + path->reada = -1; 2608 2608 2609 2609 /* FIXME, add redo link to tree so we don't leak on crash */ 2610 2610 key.objectid = inode->i_ino;
+3 -3
fs/btrfs/print-tree.c
··· 309 309 } 310 310 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", 311 311 (unsigned long long)btrfs_header_bytenr(c), 312 - btrfs_header_level(c), nr, 312 + level, nr, 313 313 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); 314 314 for (i = 0; i < nr; i++) { 315 315 btrfs_node_key_to_cpu(c, &key, i); ··· 326 326 btrfs_level_size(root, level - 1), 327 327 btrfs_node_ptr_generation(c, i)); 328 328 if (btrfs_is_leaf(next) && 329 - btrfs_header_level(c) != 1) 329 + level != 1) 330 330 BUG(); 331 331 if (btrfs_header_level(next) != 332 - btrfs_header_level(c) - 1) 332 + level - 1) 333 333 BUG(); 334 334 btrfs_print_tree(root, next); 335 335 free_extent_buffer(next);
+3
fs/btrfs/relocation.c
··· 670 670 err = ret; 671 671 goto out; 672 672 } 673 + if (ret > 0 && path2->slots[level] > 0) 674 + path2->slots[level]--; 673 675 674 676 eb = path2->nodes[level]; 675 677 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != ··· 1611 1609 BUG_ON(level == 0); 1612 1610 path->lowest_level = level; 1613 1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1612 + path->lowest_level = 0; 1614 1613 if (ret < 0) { 1615 1614 btrfs_free_path(path); 1616 1615 return ret;
+19 -21
fs/btrfs/transaction.c
··· 40 40 } 41 41 } 42 42 43 + static noinline void switch_commit_root(struct btrfs_root *root) 44 + { 45 + down_write(&root->commit_root_sem); 46 + free_extent_buffer(root->commit_root); 47 + root->commit_root = btrfs_root_node(root); 48 + up_write(&root->commit_root_sem); 49 + } 50 + 43 51 /* 44 52 * either allocate a new transaction or hop into the existing one 45 53 */ ··· 452 444 453 445 btrfs_write_dirty_block_groups(trans, root); 454 446 455 - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 456 - BUG_ON(ret); 457 - 458 447 while (1) { 459 448 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 460 449 if (old_root_bytenr == root->node->start) ··· 462 457 &root->root_key, 463 458 &root->root_item); 464 459 BUG_ON(ret); 465 - btrfs_write_dirty_block_groups(trans, root); 466 460 467 - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 461 + ret = btrfs_write_dirty_block_groups(trans, root); 468 462 BUG_ON(ret); 469 463 } 470 - free_extent_buffer(root->commit_root); 471 - root->commit_root = btrfs_root_node(root); 464 + switch_commit_root(root); 472 465 return 0; 473 466 } 474 467 ··· 498 495 root = list_entry(next, struct btrfs_root, dirty_list); 499 496 500 497 update_cowonly_root(trans, root); 501 - 502 - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 503 - BUG_ON(ret); 504 498 } 505 499 return 0; 506 500 } ··· 544 544 btrfs_update_reloc_root(trans, root); 545 545 546 546 if (root->commit_root != root->node) { 547 - free_extent_buffer(root->commit_root); 548 - root->commit_root = btrfs_root_node(root); 547 + switch_commit_root(root); 549 548 btrfs_set_root_node(&root->root_item, 550 549 root->node); 551 550 } ··· 942 943 943 944 mutex_unlock(&root->fs_info->trans_mutex); 944 945 945 - if (flush_on_commit || snap_pending) { 946 - if (flush_on_commit) 947 - btrfs_start_delalloc_inodes(root); 946 + if (flush_on_commit) { 947 + btrfs_start_delalloc_inodes(root); 948 + ret = btrfs_wait_ordered_extents(root, 0); 949 + BUG_ON(ret); 950 + } else if (snap_pending) { 948 951 ret = btrfs_wait_ordered_extents(root, 1); 949 952 BUG_ON(ret); 950 953 } ··· 1010 1009 1011 1010 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1012 1011 root->fs_info->tree_root->node); 1013 - free_extent_buffer(root->fs_info->tree_root->commit_root); 1014 - root->fs_info->tree_root->commit_root = 1015 - btrfs_root_node(root->fs_info->tree_root); 1012 + switch_commit_root(root->fs_info->tree_root); 1016 1013 1017 1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1018 1015 root->fs_info->chunk_root->node); 1019 - free_extent_buffer(root->fs_info->chunk_root->commit_root); 1020 - root->fs_info->chunk_root->commit_root = 1021 - btrfs_root_node(root->fs_info->chunk_root); 1016 + switch_commit_root(root->fs_info->chunk_root); 1022 1017 1023 1018 update_super_roots(root); 1024 1019 ··· 1054 1057 cur_trans->commit_done = 1; 1055 1058 1056 1059 root->fs_info->last_trans_committed = cur_trans->transid; 1060 + 1057 1061 wake_up(&cur_trans->commit_wait); 1058 1062 1059 1063 put_transaction(cur_trans);
+1 -1
fs/btrfs/tree-log.c
··· 797 797 return -ENOENT; 798 798 799 799 inode = read_one_inode(root, key->objectid); 800 - BUG_ON(!dir); 800 + BUG_ON(!inode); 801 801 802 802 ref_ptr = btrfs_item_ptr_offset(eb, slot); 803 803 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
+19 -27
fs/btrfs/volumes.c
··· 721 721 */ 722 722 static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, 723 723 struct btrfs_device *device, 724 - u64 num_bytes, u64 *start) 724 + u64 num_bytes, u64 *start, 725 + u64 *max_avail) 725 726 { 726 727 struct btrfs_key key; 727 728 struct btrfs_root *root = device->dev_root; ··· 759 758 ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 760 759 if (ret < 0) 761 760 goto error; 762 - ret = btrfs_previous_item(root, path, 0, key.type); 763 - if (ret < 0) 764 - goto error; 761 + if (ret > 0) { 762 + ret = btrfs_previous_item(root, path, key.objectid, key.type); 763 + if (ret < 0) 764 + goto error; 765 + if (ret > 0) 766 + start_found = 1; 767 + } 765 768 l = path->nodes[0]; 766 769 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 767 770 while (1) { ··· 808 803 if (last_byte < search_start) 809 804 last_byte = search_start; 810 805 hole_size = key.offset - last_byte; 806 + 807 + if (hole_size > *max_avail) 808 + *max_avail = hole_size; 809 + 811 810 if (key.offset > last_byte && 812 811 hole_size >= num_bytes) { 813 812 *start = last_byte; ··· 1630 1621 device->fs_devices->total_rw_bytes += diff; 1631 1622 1632 1623 device->total_bytes = new_size; 1624 + device->disk_total_bytes = new_size; 1633 1625 btrfs_clear_space_info_full(device->dev_root->fs_info); 1634 1626 1635 1627 return btrfs_update_device(trans, device); ··· 2017 2007 goto done; 2018 2008 if (ret) { 2019 2009 ret = 0; 2020 - goto done; 2010 + break; 2021 2011 } 2022 2012 2023 2013 l = path->nodes[0]; ··· 2025 2015 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2026 2016 2027 2017 if (key.objectid != device->devid) 2028 - goto done; 2018 + break; 2029 2019 2030 2020 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2031 2021 length = btrfs_dev_extent_length(l, dev_extent); ··· 2181 2171 max_chunk_size); 2182 2172 2183 2173 again: 2174 + max_avail = 0; 2184 2175 if (!map || map->num_stripes != num_stripes) { 2185 2176 kfree(map); 2186 2177 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); ··· 2230 2219 2231 2220 if (device->in_fs_metadata && avail >= min_free) { 2232 2221 ret = find_free_dev_extent(trans, device, 2233 - min_free, &dev_offset); 2222 + min_free, &dev_offset, 2223 + &max_avail); 2234 2224 if (ret == 0) { 2235 2225 list_move_tail(&device->dev_alloc_list, 2236 2226 &private_devs); ··· 2805 2793 WARN_ON(nr >= map->num_stripes); 2806 2794 buf[nr++] = bytenr; 2807 2795 } 2808 - } 2809 - 2810 - for (i = 0; i > nr; i++) { 2811 - struct btrfs_multi_bio *multi; 2812 - struct btrfs_bio_stripe *stripe; 2813 - int ret; 2814 - 2815 - length = 1; 2816 - ret = btrfs_map_block(map_tree, WRITE, buf[i], 2817 - &length, &multi, 0); 2818 - BUG_ON(ret); 2819 - 2820 - stripe = multi->stripes; 2821 - for (j = 0; j < multi->num_stripes; j++) { 2822 - if (stripe->physical >= physical && 2823 - physical < stripe->physical + length) 2824 - break; 2825 - } 2826 - BUG_ON(j >= multi->num_stripes); 2827 - kfree(multi); 2828 2796 } 2829 2797 2830 2798 *logical = buf;