commit ae1a25da8448271a99745da03100d5299575a269 · tjh.dev/kernel

+8

MAINTAINERS

··· 1021 W: http://bu3sch.de/btgpio.php 1022 S: Maintained 1023 1024 BTTV VIDEO4LINUX DRIVER 1025 P: Mauro Carvalho Chehab 1026 M: mchehab@infradead.org

··· 1021 W: http://bu3sch.de/btgpio.php 1022 S: Maintained 1023 1024 + BTRFS FILE SYSTEM 1025 + P: Chris Mason 1026 + M: chris.mason@oracle.com 1027 + L: linux-btrfs@vger.kernel.org 1028 + W: http://btrfs.wiki.kernel.org/ 1029 + T: git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git 1030 + S: Maintained 1031 + 1032 BTTV VIDEO4LINUX DRIVER 1033 P: Mauro Carvalho Chehab 1034 M: mchehab@infradead.org

+13

fs/btrfs/Kconfig

··· 16 module will be called btrfs. 17 18 If unsure, say N.

··· 16 module will be called btrfs. 17 18 If unsure, say N. 19 + 20 + config BTRFS_FS_POSIX_ACL 21 + bool "Btrfs POSIX Access Control Lists" 22 + depends on BTRFS_FS 23 + select FS_POSIX_ACL 24 + help 25 + POSIX Access Control Lists (ACLs) support permissions for users and 26 + groups beyond the owner/group/world scheme. 27 + 28 + To learn more about Access Control Lists, visit the POSIX ACLs for 29 + Linux website <http://acl.bestbits.at/>. 30 + 31 + If you don't know what Access Control Lists are, say N

+53 -8

fs/btrfs/async-thread.c

··· 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 - #include <linux/version.h> 20 #include <linux/kthread.h> 21 #include <linux/list.h> 22 #include <linux/spinlock.h> 23 - # include <linux/freezer.h> 24 #include "async-thread.h" 25 26 #define WORK_QUEUED_BIT 0 ··· 143 struct btrfs_work *work; 144 do { 145 spin_lock_irq(&worker->lock); 146 while (!list_empty(&worker->pending)) { 147 cur = worker->pending.next; 148 work = list_entry(cur, struct btrfs_work, list); ··· 166 check_idle_worker(worker); 167 168 } 169 - worker->working = 0; 170 if (freezing(current)) { 171 refrigerator(); 172 } else { 173 - set_current_state(TASK_INTERRUPTIBLE); 174 spin_unlock_irq(&worker->lock); 175 - if (!kthread_should_stop()) 176 schedule(); 177 __set_current_state(TASK_RUNNING); 178 } 179 } while (!kthread_should_stop()); ··· 387 { 388 struct btrfs_worker_thread *worker = work->worker; 389 unsigned long flags; 390 391 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 392 goto out; 393 394 spin_lock_irqsave(&worker->lock, flags); 395 - atomic_inc(&worker->num_pending); 396 list_add_tail(&work->list, &worker->pending); 397 398 /* by definition we're busy, take ourselves off the idle 399 * list ··· 406 &worker->workers->worker_list); 407 spin_unlock_irqrestore(&worker->workers->lock, flags); 408 } 409 410 spin_unlock_irqrestore(&worker->lock, flags); 411 - 412 out: 413 return 0; 414 } 415 ··· 442 } 443 444 spin_lock_irqsave(&worker->lock, flags); 445 atomic_inc(&worker->num_pending); 446 check_busy_worker(worker); 447 - list_add_tail(&work->list, &worker->pending); 448 449 /* 450 * avoid calling into wake_up_process if this thread has already

··· 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/list.h> 21 #include <linux/spinlock.h> 22 + #include <linux/freezer.h> 23 + #include <linux/ftrace.h> 24 #include "async-thread.h" 25 26 #define WORK_QUEUED_BIT 0 ··· 143 struct btrfs_work *work; 144 do { 145 spin_lock_irq(&worker->lock); 146 + again_locked: 147 while (!list_empty(&worker->pending)) { 148 cur = worker->pending.next; 149 work = list_entry(cur, struct btrfs_work, list); ··· 165 check_idle_worker(worker); 166 167 } 168 if (freezing(current)) { 169 + worker->working = 0; 170 + spin_unlock_irq(&worker->lock); 171 refrigerator(); 172 } else { 173 spin_unlock_irq(&worker->lock); 174 + if (!kthread_should_stop()) { 175 + cpu_relax(); 176 + /* 177 + * we've dropped the lock, did someone else 178 + * jump_in? 179 + */ 180 + smp_mb(); 181 + if (!list_empty(&worker->pending)) 182 + continue; 183 + 184 + /* 185 + * this short schedule allows more work to 186 + * come in without the queue functions 187 + * needing to go through wake_up_process() 188 + * 189 + * worker->working is still 1, so nobody 190 + * is going to try and wake us up 191 + */ 192 + schedule_timeout(1); 193 + smp_mb(); 194 + if (!list_empty(&worker->pending)) 195 + continue; 196 + 197 + /* still no more work?, sleep for real */ 198 + spin_lock_irq(&worker->lock); 199 + set_current_state(TASK_INTERRUPTIBLE); 200 + if (!list_empty(&worker->pending)) 201 + goto again_locked; 202 + 203 + /* 204 + * this makes sure we get a wakeup when someone 205 + * adds something new to the queue 206 + */ 207 + worker->working = 0; 208 + spin_unlock_irq(&worker->lock); 209 + 210 schedule(); 211 + } 212 __set_current_state(TASK_RUNNING); 213 } 214 } while (!kthread_should_stop()); ··· 350 { 351 struct btrfs_worker_thread *worker = work->worker; 352 unsigned long flags; 353 + int wake = 0; 354 355 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 356 goto out; 357 358 spin_lock_irqsave(&worker->lock, flags); 359 list_add_tail(&work->list, &worker->pending); 360 + atomic_inc(&worker->num_pending); 361 362 /* by definition we're busy, take ourselves off the idle 363 * list ··· 368 &worker->workers->worker_list); 369 spin_unlock_irqrestore(&worker->workers->lock, flags); 370 } 371 + if (!worker->working) { 372 + wake = 1; 373 + worker->working = 1; 374 + } 375 376 spin_unlock_irqrestore(&worker->lock, flags); 377 + if (wake) 378 + wake_up_process(worker->task); 379 out: 380 + 381 return 0; 382 } 383 ··· 398 } 399 400 spin_lock_irqsave(&worker->lock, flags); 401 + 402 + list_add_tail(&work->list, &worker->pending); 403 atomic_inc(&worker->num_pending); 404 check_busy_worker(worker); 405 406 /* 407 * avoid calling into wake_up_process if this thread has already

-1

fs/btrfs/compression.c

··· 32 #include <linux/swap.h> 33 #include <linux/writeback.h> 34 #include <linux/bit_spinlock.h> 35 - #include <linux/version.h> 36 #include <linux/pagevec.h> 37 #include "compat.h" 38 #include "ctree.h"

··· 32 #include <linux/swap.h> 33 #include <linux/writeback.h> 34 #include <linux/bit_spinlock.h> 35 #include <linux/pagevec.h> 36 #include "compat.h" 37 #include "ctree.h"

+246 -34

fs/btrfs/ctree.c

··· 54 return path; 55 } 56 57 /* this also releases the path */ 58 void btrfs_free_path(struct btrfs_path *p) 59 { ··· 297 if (IS_ERR(cow)) 298 return PTR_ERR(cow); 299 300 copy_extent_buffer(cow, buf, 0, 0, cow->len); 301 btrfs_set_header_bytenr(cow, cow->start); 302 btrfs_set_header_generation(cow, trans->transid); ··· 415 WARN_ON(1); 416 } 417 418 - spin_lock(&root->fs_info->hash_lock); 419 if (btrfs_header_generation(buf) == trans->transid && 420 btrfs_header_owner(buf) == root->root_key.objectid && 421 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 422 *cow_ret = buf; 423 - spin_unlock(&root->fs_info->hash_lock); 424 WARN_ON(prealloc_dest); 425 return 0; 426 } 427 - spin_unlock(&root->fs_info->hash_lock); 428 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 429 ret = __btrfs_cow_block(trans, root, buf, parent, 430 parent_slot, cow_ret, search_start, 0, 431 prealloc_dest); ··· 534 if (parent_nritems == 1) 535 return 0; 536 537 for (i = start_slot; i < end_slot; i++) { 538 int close = 1; 539 ··· 596 search_start = last_block; 597 598 btrfs_tree_lock(cur); 599 err = __btrfs_cow_block(trans, root, cur, parent, i, 600 &cur, search_start, 601 min(16 * blocksize, ··· 895 return 0; 896 897 mid = path->nodes[level]; 898 WARN_ON(!path->locks[level]); 899 WARN_ON(btrfs_header_generation(mid) != trans->transid); 900 ··· 918 /* promote the child to a root */ 919 child = read_node_slot(root, mid, 0); 920 btrfs_tree_lock(child); 921 BUG_ON(!child); 922 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 923 BUG_ON(ret); ··· 935 936 add_root_to_dirty_list(root); 937 btrfs_tree_unlock(child); 938 path->locks[level] = 0; 939 path->nodes[level] = NULL; 940 clean_tree_block(trans, root, mid); ··· 960 left = read_node_slot(root, parent, pslot - 1); 961 if (left) { 962 btrfs_tree_lock(left); 963 wret = btrfs_cow_block(trans, root, left, 964 parent, pslot - 1, &left, 0); 965 if (wret) { ··· 971 right = read_node_slot(root, parent, pslot + 1); 972 if (right) { 973 btrfs_tree_lock(right); 974 wret = btrfs_cow_block(trans, root, right, 975 parent, pslot + 1, &right, 0); 976 if (wret) { ··· 1147 u32 left_nr; 1148 1149 btrfs_tree_lock(left); 1150 left_nr = btrfs_header_nritems(left); 1151 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1152 wret = 1; ··· 1195 */ 1196 if (right) { 1197 u32 right_nr; 1198 btrfs_tree_lock(right); 1199 right_nr = btrfs_header_nritems(right); 1200 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1201 wret = 1; ··· 1253 struct btrfs_disk_key disk_key; 1254 u32 nritems; 1255 u64 search; 1256 - u64 lowest_read; 1257 - u64 highest_read; 1258 u64 nread = 0; 1259 int direction = path->reada; 1260 struct extent_buffer *eb; ··· 1277 return; 1278 } 1279 1280 - highest_read = search; 1281 - lowest_read = search; 1282 1283 nritems = btrfs_header_nritems(node); 1284 nr = slot; ··· 1297 break; 1298 } 1299 search = btrfs_node_blockptr(node, nr); 1300 - if ((search >= lowest_read && search <= highest_read) || 1301 - (search < lowest_read && lowest_read - search <= 16384) || 1302 - (search > highest_read && search - highest_read <= 16384)) { 1303 readahead_tree_block(root, search, blocksize, 1304 btrfs_node_ptr_generation(node, nr)); 1305 nread += blocksize; 1306 } 1307 nscan++; 1308 - if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) 1309 break; 1310 - 1311 - if (nread > (256 * 1024) || nscan > 128) 1312 - break; 1313 - 1314 - if (search < lowest_read) 1315 - lowest_read = search; 1316 - if (search > highest_read) 1317 - highest_read = search; 1318 } 1319 } 1320 1321 /* 1322 * when we walk down the tree, it is usually safe to unlock the higher layers ··· 1418 btrfs_tree_unlock(t); 1419 path->locks[i] = 0; 1420 } 1421 } 1422 } 1423 ··· 1507 int wret; 1508 1509 /* is a cow on this block not required */ 1510 - spin_lock(&root->fs_info->hash_lock); 1511 if (btrfs_header_generation(b) == trans->transid && 1512 btrfs_header_owner(b) == root->root_key.objectid && 1513 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { 1514 - spin_unlock(&root->fs_info->hash_lock); 1515 goto cow_done; 1516 } 1517 - spin_unlock(&root->fs_info->hash_lock); 1518 1519 /* ok, we have to cow, is our old prealloc the right 1520 * size? 1521 */ 1522 if (prealloc_block.objectid && 1523 prealloc_block.offset != b->len) { 1524 btrfs_free_reserved_extent(root, 1525 prealloc_block.objectid, 1526 prealloc_block.offset); 1527 prealloc_block.objectid = 0; 1528 } 1529 1530 /* 1531 * for higher level blocks, try not to allocate blocks 1532 * with the block and the parent locks held. 1533 */ 1534 - if (level > 1 && !prealloc_block.objectid && 1535 btrfs_path_lock_waiting(p, level)) { 1536 u32 size = b->len; 1537 u64 hint = b->start; ··· 1543 BUG_ON(ret); 1544 goto again; 1545 } 1546 1547 wret = btrfs_cow_block(trans, root, b, 1548 p->nodes[level + 1], ··· 1567 if (!p->skip_locking) 1568 p->locks[level] = 1; 1569 1570 ret = check_block(root, p, level); 1571 if (ret) { 1572 ret = -1; ··· 1590 } 1591 1592 ret = bin_search(b, key, level, &slot); 1593 if (level != 0) { 1594 if (ret && slot > 0) 1595 slot -= 1; ··· 1598 if ((p->search_for_split || ins_len > 0) && 1599 btrfs_header_nritems(b) >= 1600 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1601 - int sret = split_node(trans, root, p, level); 1602 BUG_ON(sret > 0); 1603 if (sret) { 1604 ret = sret; ··· 1615 } 1616 b = p->nodes[level]; 1617 slot = p->slots[level]; 1618 - } else if (ins_len < 0) { 1619 - int sret = balance_level(trans, root, p, 1620 - level); 1621 if (sret) { 1622 ret = sret; 1623 goto done; ··· 1661 * of the btree by dropping locks before 1662 * we read. 1663 */ 1664 - if (level > 1) { 1665 btrfs_release_path(NULL, p); 1666 if (tmp) 1667 free_extent_buffer(tmp); ··· 1676 free_extent_buffer(tmp); 1677 goto again; 1678 } else { 1679 if (tmp) 1680 free_extent_buffer(tmp); 1681 if (should_reada) ··· 1686 b = read_node_slot(root, b, slot); 1687 } 1688 } 1689 - if (!p->skip_locking) 1690 - btrfs_tree_lock(b); 1691 } else { 1692 p->slots[level] = slot; 1693 if (ins_len > 0 && 1694 btrfs_leaf_free_space(root, b) < ins_len) { 1695 - int sret = split_leaf(trans, root, key, 1696 p, ins_len, ret == 0); 1697 BUG_ON(sret > 0); 1698 if (sret) { 1699 ret = sret; ··· 1722 } 1723 ret = 1; 1724 done: 1725 if (prealloc_block.objectid) { 1726 btrfs_free_reserved_extent(root, 1727 prealloc_block.objectid, 1728 prealloc_block.offset); 1729 } 1730 - 1731 return ret; 1732 } 1733 ··· 1754 eb = btrfs_lock_root_node(root); 1755 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1756 BUG_ON(ret); 1757 1758 parent = eb; 1759 while (1) { ··· 1781 eb = read_tree_block(root, bytenr, blocksize, 1782 generation); 1783 btrfs_tree_lock(eb); 1784 } 1785 1786 /* ··· 1806 eb = read_tree_block(root, bytenr, blocksize, 1807 generation); 1808 btrfs_tree_lock(eb); 1809 } 1810 1811 ret = btrfs_cow_block(trans, root, eb, parent, slot, ··· 2353 2354 right = read_node_slot(root, upper, slot + 1); 2355 btrfs_tree_lock(right); 2356 free_space = btrfs_leaf_free_space(root, right); 2357 if (free_space < data_size) 2358 goto out_unlock; ··· 2550 2551 left = read_node_slot(root, path->nodes[1], slot - 1); 2552 btrfs_tree_lock(left); 2553 free_space = btrfs_leaf_free_space(root, left); 2554 if (free_space < data_size) { 2555 ret = 1; ··· 3009 sizeof(struct btrfs_item), 1); 3010 path->keep_locks = 0; 3011 BUG_ON(ret); 3012 3013 leaf = path->nodes[0]; 3014 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); ··· 3545 BUG(); 3546 } 3547 out: 3548 return ret; 3549 } 3550 ··· 3633 { 3634 int ret; 3635 u64 root_gen = btrfs_header_generation(path->nodes[1]); 3636 3637 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3638 if (ret) 3639 return ret; 3640 3641 ret = btrfs_free_extent(trans, root, bytenr, 3642 btrfs_level_size(root, 0), 3643 - path->nodes[1]->start, 3644 - btrfs_header_owner(path->nodes[1]), 3645 root_gen, 0, 1); 3646 return ret; 3647 } ··· 3920 */ 3921 if (slot >= nritems) { 3922 path->slots[level] = slot; 3923 sret = btrfs_find_next_key(root, path, min_key, level, 3924 cache_only, min_trans); 3925 if (sret == 0) { 3926 btrfs_release_path(root, path); 3927 goto again; 3928 } else { 3929 goto out; 3930 } 3931 } ··· 3939 unlock_up(path, level, 1); 3940 goto out; 3941 } 3942 cur = read_node_slot(root, cur, slot); 3943 3944 btrfs_tree_lock(cur); 3945 path->locks[level - 1] = 1; 3946 path->nodes[level - 1] = cur; 3947 unlock_up(path, level, 1); 3948 } 3949 out: 3950 if (ret == 0) 3951 memcpy(min_key, &found_key, sizeof(found_key)); 3952 return ret; 3953 } 3954 ··· 4048 if (ret < 0) 4049 return ret; 4050 4051 nritems = btrfs_header_nritems(path->nodes[0]); 4052 /* 4053 * by releasing the path above we dropped all our locks. A balance ··· 4079 free_extent_buffer(next); 4080 } 4081 4082 if (level == 1 && (path->locks[1] || path->skip_locking) && 4083 path->reada) 4084 reada_for_search(root, path, level, slot, 0); ··· 4088 if (!path->skip_locking) { 4089 WARN_ON(!btrfs_tree_locked(c)); 4090 btrfs_tree_lock(next); 4091 } 4092 break; 4093 } ··· 4105 path->locks[level] = 1; 4106 if (!level) 4107 break; 4108 if (level == 1 && path->locks[1] && path->reada) 4109 reada_for_search(root, path, level, slot, 0); 4110 next = read_node_slot(root, next, 0); 4111 if (!path->skip_locking) { 4112 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 4113 btrfs_tree_lock(next); 4114 } 4115 } 4116 done: ··· 4138 4139 while (1) { 4140 if (path->slots[0] == 0) { 4141 ret = btrfs_prev_leaf(root, path); 4142 if (ret != 0) 4143 return ret;

··· 54 return path; 55 } 56 57 + /* 58 + * set all locked nodes in the path to blocking locks. This should 59 + * be done before scheduling 60 + */ 61 + noinline void btrfs_set_path_blocking(struct btrfs_path *p) 62 + { 63 + int i; 64 + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 65 + if (p->nodes[i] && p->locks[i]) 66 + btrfs_set_lock_blocking(p->nodes[i]); 67 + } 68 + } 69 + 70 + /* 71 + * reset all the locked nodes in the patch to spinning locks. 72 + */ 73 + noinline void btrfs_clear_path_blocking(struct btrfs_path *p) 74 + { 75 + int i; 76 + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 77 + if (p->nodes[i] && p->locks[i]) 78 + btrfs_clear_lock_blocking(p->nodes[i]); 79 + } 80 + } 81 + 82 /* this also releases the path */ 83 void btrfs_free_path(struct btrfs_path *p) 84 { ··· 272 if (IS_ERR(cow)) 273 return PTR_ERR(cow); 274 275 + /* cow is set to blocking by btrfs_init_new_buffer */ 276 + 277 copy_extent_buffer(cow, buf, 0, 0, cow->len); 278 btrfs_set_header_bytenr(cow, cow->start); 279 btrfs_set_header_generation(cow, trans->transid); ··· 388 WARN_ON(1); 389 } 390 391 if (btrfs_header_generation(buf) == trans->transid && 392 btrfs_header_owner(buf) == root->root_key.objectid && 393 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 394 *cow_ret = buf; 395 WARN_ON(prealloc_dest); 396 return 0; 397 } 398 + 399 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 400 + 401 + if (parent) 402 + btrfs_set_lock_blocking(parent); 403 + btrfs_set_lock_blocking(buf); 404 + 405 ret = __btrfs_cow_block(trans, root, buf, parent, 406 parent_slot, cow_ret, search_start, 0, 407 prealloc_dest); ··· 504 if (parent_nritems == 1) 505 return 0; 506 507 + btrfs_set_lock_blocking(parent); 508 + 509 for (i = start_slot; i < end_slot; i++) { 510 int close = 1; 511 ··· 564 search_start = last_block; 565 566 btrfs_tree_lock(cur); 567 + btrfs_set_lock_blocking(cur); 568 err = __btrfs_cow_block(trans, root, cur, parent, i, 569 &cur, search_start, 570 min(16 * blocksize, ··· 862 return 0; 863 864 mid = path->nodes[level]; 865 + 866 WARN_ON(!path->locks[level]); 867 WARN_ON(btrfs_header_generation(mid) != trans->transid); 868 ··· 884 /* promote the child to a root */ 885 child = read_node_slot(root, mid, 0); 886 btrfs_tree_lock(child); 887 + btrfs_set_lock_blocking(child); 888 BUG_ON(!child); 889 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 890 BUG_ON(ret); ··· 900 901 add_root_to_dirty_list(root); 902 btrfs_tree_unlock(child); 903 + 904 path->locks[level] = 0; 905 path->nodes[level] = NULL; 906 clean_tree_block(trans, root, mid); ··· 924 left = read_node_slot(root, parent, pslot - 1); 925 if (left) { 926 btrfs_tree_lock(left); 927 + btrfs_set_lock_blocking(left); 928 wret = btrfs_cow_block(trans, root, left, 929 parent, pslot - 1, &left, 0); 930 if (wret) { ··· 934 right = read_node_slot(root, parent, pslot + 1); 935 if (right) { 936 btrfs_tree_lock(right); 937 + btrfs_set_lock_blocking(right); 938 wret = btrfs_cow_block(trans, root, right, 939 parent, pslot + 1, &right, 0); 940 if (wret) { ··· 1109 u32 left_nr; 1110 1111 btrfs_tree_lock(left); 1112 + btrfs_set_lock_blocking(left); 1113 + 1114 left_nr = btrfs_header_nritems(left); 1115 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1116 wret = 1; ··· 1155 */ 1156 if (right) { 1157 u32 right_nr; 1158 + 1159 btrfs_tree_lock(right); 1160 + btrfs_set_lock_blocking(right); 1161 + 1162 right_nr = btrfs_header_nritems(right); 1163 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1164 wret = 1; ··· 1210 struct btrfs_disk_key disk_key; 1211 u32 nritems; 1212 u64 search; 1213 + u64 target; 1214 u64 nread = 0; 1215 int direction = path->reada; 1216 struct extent_buffer *eb; ··· 1235 return; 1236 } 1237 1238 + target = search; 1239 1240 nritems = btrfs_header_nritems(node); 1241 nr = slot; ··· 1256 break; 1257 } 1258 search = btrfs_node_blockptr(node, nr); 1259 + if ((search <= target && target - search <= 65536) || 1260 + (search > target && search - target <= 65536)) { 1261 readahead_tree_block(root, search, blocksize, 1262 btrfs_node_ptr_generation(node, nr)); 1263 nread += blocksize; 1264 } 1265 nscan++; 1266 + if ((nread > 65536 || nscan > 32)) 1267 break; 1268 } 1269 } 1270 + 1271 + /* 1272 + * returns -EAGAIN if it had to drop the path, or zero if everything was in 1273 + * cache 1274 + */ 1275 + static noinline int reada_for_balance(struct btrfs_root *root, 1276 + struct btrfs_path *path, int level) 1277 + { 1278 + int slot; 1279 + int nritems; 1280 + struct extent_buffer *parent; 1281 + struct extent_buffer *eb; 1282 + u64 gen; 1283 + u64 block1 = 0; 1284 + u64 block2 = 0; 1285 + int ret = 0; 1286 + int blocksize; 1287 + 1288 + parent = path->nodes[level - 1]; 1289 + if (!parent) 1290 + return 0; 1291 + 1292 + nritems = btrfs_header_nritems(parent); 1293 + slot = path->slots[level]; 1294 + blocksize = btrfs_level_size(root, level); 1295 + 1296 + if (slot > 0) { 1297 + block1 = btrfs_node_blockptr(parent, slot - 1); 1298 + gen = btrfs_node_ptr_generation(parent, slot - 1); 1299 + eb = btrfs_find_tree_block(root, block1, blocksize); 1300 + if (eb && btrfs_buffer_uptodate(eb, gen)) 1301 + block1 = 0; 1302 + free_extent_buffer(eb); 1303 + } 1304 + if (slot < nritems) { 1305 + block2 = btrfs_node_blockptr(parent, slot + 1); 1306 + gen = btrfs_node_ptr_generation(parent, slot + 1); 1307 + eb = btrfs_find_tree_block(root, block2, blocksize); 1308 + if (eb && btrfs_buffer_uptodate(eb, gen)) 1309 + block2 = 0; 1310 + free_extent_buffer(eb); 1311 + } 1312 + if (block1 || block2) { 1313 + ret = -EAGAIN; 1314 + btrfs_release_path(root, path); 1315 + if (block1) 1316 + readahead_tree_block(root, block1, blocksize, 0); 1317 + if (block2) 1318 + readahead_tree_block(root, block2, blocksize, 0); 1319 + 1320 + if (block1) { 1321 + eb = read_tree_block(root, block1, blocksize, 0); 1322 + free_extent_buffer(eb); 1323 + } 1324 + if (block1) { 1325 + eb = read_tree_block(root, block2, blocksize, 0); 1326 + free_extent_buffer(eb); 1327 + } 1328 + } 1329 + return ret; 1330 + } 1331 + 1332 1333 /* 1334 * when we walk down the tree, it is usually safe to unlock the higher layers ··· 1324 btrfs_tree_unlock(t); 1325 path->locks[i] = 0; 1326 } 1327 + } 1328 + } 1329 + 1330 + /* 1331 + * This releases any locks held in the path starting at level and 1332 + * going all the way up to the root. 1333 + * 1334 + * btrfs_search_slot will keep the lock held on higher nodes in a few 1335 + * corner cases, such as COW of the block at slot zero in the node. This 1336 + * ignores those rules, and it should only be called when there are no 1337 + * more updates to be done higher up in the tree. 1338 + */ 1339 + noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1340 + { 1341 + int i; 1342 + 1343 + if (path->keep_locks || path->lowest_level) 1344 + return; 1345 + 1346 + for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1347 + if (!path->nodes[i]) 1348 + continue; 1349 + if (!path->locks[i]) 1350 + continue; 1351 + btrfs_tree_unlock(path->nodes[i]); 1352 + path->locks[i] = 0; 1353 } 1354 } 1355 ··· 1387 int wret; 1388 1389 /* is a cow on this block not required */ 1390 if (btrfs_header_generation(b) == trans->transid && 1391 btrfs_header_owner(b) == root->root_key.objectid && 1392 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { 1393 goto cow_done; 1394 } 1395 1396 /* ok, we have to cow, is our old prealloc the right 1397 * size? 1398 */ 1399 if (prealloc_block.objectid && 1400 prealloc_block.offset != b->len) { 1401 + btrfs_release_path(root, p); 1402 btrfs_free_reserved_extent(root, 1403 prealloc_block.objectid, 1404 prealloc_block.offset); 1405 prealloc_block.objectid = 0; 1406 + goto again; 1407 } 1408 1409 /* 1410 * for higher level blocks, try not to allocate blocks 1411 * with the block and the parent locks held. 1412 */ 1413 + if (level > 0 && !prealloc_block.objectid && 1414 btrfs_path_lock_waiting(p, level)) { 1415 u32 size = b->len; 1416 u64 hint = b->start; ··· 1424 BUG_ON(ret); 1425 goto again; 1426 } 1427 + 1428 + btrfs_set_path_blocking(p); 1429 1430 wret = btrfs_cow_block(trans, root, b, 1431 p->nodes[level + 1], ··· 1446 if (!p->skip_locking) 1447 p->locks[level] = 1; 1448 1449 + btrfs_clear_path_blocking(p); 1450 + 1451 + /* 1452 + * we have a lock on b and as long as we aren't changing 1453 + * the tree, there is no way to for the items in b to change. 1454 + * It is safe to drop the lock on our parent before we 1455 + * go through the expensive btree search on b. 1456 + * 1457 + * If cow is true, then we might be changing slot zero, 1458 + * which may require changing the parent. So, we can't 1459 + * drop the lock until after we know which slot we're 1460 + * operating on. 1461 + */ 1462 + if (!cow) 1463 + btrfs_unlock_up_safe(p, level + 1); 1464 + 1465 ret = check_block(root, p, level); 1466 if (ret) { 1467 ret = -1; ··· 1453 } 1454 1455 ret = bin_search(b, key, level, &slot); 1456 + 1457 if (level != 0) { 1458 if (ret && slot > 0) 1459 slot -= 1; ··· 1460 if ((p->search_for_split || ins_len > 0) && 1461 btrfs_header_nritems(b) >= 1462 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1463 + int sret; 1464 + 1465 + sret = reada_for_balance(root, p, level); 1466 + if (sret) 1467 + goto again; 1468 + 1469 + btrfs_set_path_blocking(p); 1470 + sret = split_node(trans, root, p, level); 1471 + btrfs_clear_path_blocking(p); 1472 + 1473 BUG_ON(sret > 0); 1474 if (sret) { 1475 ret = sret; ··· 1468 } 1469 b = p->nodes[level]; 1470 slot = p->slots[level]; 1471 + } else if (ins_len < 0 && 1472 + btrfs_header_nritems(b) < 1473 + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { 1474 + int sret; 1475 + 1476 + sret = reada_for_balance(root, p, level); 1477 + if (sret) 1478 + goto again; 1479 + 1480 + btrfs_set_path_blocking(p); 1481 + sret = balance_level(trans, root, p, level); 1482 + btrfs_clear_path_blocking(p); 1483 + 1484 if (sret) { 1485 ret = sret; 1486 goto done; ··· 1504 * of the btree by dropping locks before 1505 * we read. 1506 */ 1507 + if (level > 0) { 1508 btrfs_release_path(NULL, p); 1509 if (tmp) 1510 free_extent_buffer(tmp); ··· 1519 free_extent_buffer(tmp); 1520 goto again; 1521 } else { 1522 + btrfs_set_path_blocking(p); 1523 if (tmp) 1524 free_extent_buffer(tmp); 1525 if (should_reada) ··· 1528 b = read_node_slot(root, b, slot); 1529 } 1530 } 1531 + if (!p->skip_locking) { 1532 + int lret; 1533 + 1534 + btrfs_clear_path_blocking(p); 1535 + lret = btrfs_try_spin_lock(b); 1536 + 1537 + if (!lret) { 1538 + btrfs_set_path_blocking(p); 1539 + btrfs_tree_lock(b); 1540 + btrfs_clear_path_blocking(p); 1541 + } 1542 + } 1543 } else { 1544 p->slots[level] = slot; 1545 if (ins_len > 0 && 1546 btrfs_leaf_free_space(root, b) < ins_len) { 1547 + int sret; 1548 + 1549 + btrfs_set_path_blocking(p); 1550 + sret = split_leaf(trans, root, key, 1551 p, ins_len, ret == 0); 1552 + btrfs_clear_path_blocking(p); 1553 + 1554 BUG_ON(sret > 0); 1555 if (sret) { 1556 ret = sret; ··· 1549 } 1550 ret = 1; 1551 done: 1552 + /* 1553 + * we don't really know what they plan on doing with the path 1554 + * from here on, so for now just mark it as blocking 1555 + */ 1556 + btrfs_set_path_blocking(p); 1557 if (prealloc_block.objectid) { 1558 btrfs_free_reserved_extent(root, 1559 prealloc_block.objectid, 1560 prealloc_block.offset); 1561 } 1562 return ret; 1563 } 1564 ··· 1577 eb = btrfs_lock_root_node(root); 1578 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1579 BUG_ON(ret); 1580 + 1581 + btrfs_set_lock_blocking(eb); 1582 1583 parent = eb; 1584 while (1) { ··· 1602 eb = read_tree_block(root, bytenr, blocksize, 1603 generation); 1604 btrfs_tree_lock(eb); 1605 + btrfs_set_lock_blocking(eb); 1606 } 1607 1608 /* ··· 1626 eb = read_tree_block(root, bytenr, blocksize, 1627 generation); 1628 btrfs_tree_lock(eb); 1629 + btrfs_set_lock_blocking(eb); 1630 } 1631 1632 ret = btrfs_cow_block(trans, root, eb, parent, slot, ··· 2172 2173 right = read_node_slot(root, upper, slot + 1); 2174 btrfs_tree_lock(right); 2175 + btrfs_set_lock_blocking(right); 2176 + 2177 free_space = btrfs_leaf_free_space(root, right); 2178 if (free_space < data_size) 2179 goto out_unlock; ··· 2367 2368 left = read_node_slot(root, path->nodes[1], slot - 1); 2369 btrfs_tree_lock(left); 2370 + btrfs_set_lock_blocking(left); 2371 + 2372 free_space = btrfs_leaf_free_space(root, left); 2373 if (free_space < data_size) { 2374 ret = 1; ··· 2824 sizeof(struct btrfs_item), 1); 2825 path->keep_locks = 0; 2826 BUG_ON(ret); 2827 + 2828 + /* 2829 + * make sure any changes to the path from split_leaf leave it 2830 + * in a blocking state 2831 + */ 2832 + btrfs_set_path_blocking(path); 2833 2834 leaf = path->nodes[0]; 2835 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); ··· 3354 BUG(); 3355 } 3356 out: 3357 + btrfs_unlock_up_safe(path, 1); 3358 return ret; 3359 } 3360 ··· 3441 { 3442 int ret; 3443 u64 root_gen = btrfs_header_generation(path->nodes[1]); 3444 + u64 parent_start = path->nodes[1]->start; 3445 + u64 parent_owner = btrfs_header_owner(path->nodes[1]); 3446 3447 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3448 if (ret) 3449 return ret; 3450 3451 + /* 3452 + * btrfs_free_extent is expensive, we want to make sure we 3453 + * aren't holding any locks when we call it 3454 + */ 3455 + btrfs_unlock_up_safe(path, 0); 3456 + 3457 ret = btrfs_free_extent(trans, root, bytenr, 3458 btrfs_level_size(root, 0), 3459 + parent_start, parent_owner, 3460 root_gen, 0, 1); 3461 return ret; 3462 } ··· 3721 */ 3722 if (slot >= nritems) { 3723 path->slots[level] = slot; 3724 + btrfs_set_path_blocking(path); 3725 sret = btrfs_find_next_key(root, path, min_key, level, 3726 cache_only, min_trans); 3727 if (sret == 0) { 3728 btrfs_release_path(root, path); 3729 goto again; 3730 } else { 3731 + btrfs_clear_path_blocking(path); 3732 goto out; 3733 } 3734 } ··· 3738 unlock_up(path, level, 1); 3739 goto out; 3740 } 3741 + btrfs_set_path_blocking(path); 3742 cur = read_node_slot(root, cur, slot); 3743 3744 btrfs_tree_lock(cur); 3745 + 3746 path->locks[level - 1] = 1; 3747 path->nodes[level - 1] = cur; 3748 unlock_up(path, level, 1); 3749 + btrfs_clear_path_blocking(path); 3750 } 3751 out: 3752 if (ret == 0) 3753 memcpy(min_key, &found_key, sizeof(found_key)); 3754 + btrfs_set_path_blocking(path); 3755 return ret; 3756 } 3757 ··· 3843 if (ret < 0) 3844 return ret; 3845 3846 + btrfs_set_path_blocking(path); 3847 nritems = btrfs_header_nritems(path->nodes[0]); 3848 /* 3849 * by releasing the path above we dropped all our locks. A balance ··· 3873 free_extent_buffer(next); 3874 } 3875 3876 + /* the path was set to blocking above */ 3877 if (level == 1 && (path->locks[1] || path->skip_locking) && 3878 path->reada) 3879 reada_for_search(root, path, level, slot, 0); ··· 3881 if (!path->skip_locking) { 3882 WARN_ON(!btrfs_tree_locked(c)); 3883 btrfs_tree_lock(next); 3884 + btrfs_set_lock_blocking(next); 3885 } 3886 break; 3887 } ··· 3897 path->locks[level] = 1; 3898 if (!level) 3899 break; 3900 + 3901 + btrfs_set_path_blocking(path); 3902 if (level == 1 && path->locks[1] && path->reada) 3903 reada_for_search(root, path, level, slot, 0); 3904 next = read_node_slot(root, next, 0); 3905 if (!path->skip_locking) { 3906 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 3907 btrfs_tree_lock(next); 3908 + btrfs_set_lock_blocking(next); 3909 } 3910 } 3911 done: ··· 3927 3928 while (1) { 3929 if (path->slots[0] == 0) { 3930 + btrfs_set_path_blocking(path); 3931 ret = btrfs_prev_leaf(root, path); 3932 if (ret != 0) 3933 return ret;

+13 -15

fs/btrfs/ctree.h

··· 454 __le32 nsec; 455 } __attribute__ ((__packed__)); 456 457 - typedef enum { 458 BTRFS_COMPRESS_NONE = 0, 459 BTRFS_COMPRESS_ZLIB = 1, 460 BTRFS_COMPRESS_LAST = 2, 461 - } btrfs_compression_type; 462 - 463 - /* we don't understand any encryption methods right now */ 464 - typedef enum { 465 - BTRFS_ENCRYPTION_NONE = 0, 466 - BTRFS_ENCRYPTION_LAST = 1, 467 - } btrfs_encryption_type; 468 469 struct btrfs_inode_item { 470 /* nfs style generation number */ ··· 695 struct btrfs_transaction *running_transaction; 696 wait_queue_head_t transaction_throttle; 697 wait_queue_head_t transaction_wait; 698 - 699 wait_queue_head_t async_submit_wait; 700 - wait_queue_head_t tree_log_wait; 701 702 struct btrfs_super_block super_copy; 703 struct btrfs_super_block super_for_commit; ··· 703 struct super_block *sb; 704 struct inode *btree_inode; 705 struct backing_dev_info bdi; 706 - spinlock_t hash_lock; 707 struct mutex trans_mutex; 708 struct mutex tree_log_mutex; 709 struct mutex transaction_kthread_mutex; ··· 721 atomic_t async_submit_draining; 722 atomic_t nr_async_bios; 723 atomic_t async_delalloc_pages; 724 - atomic_t tree_log_writers; 725 - atomic_t tree_log_commit; 726 - unsigned long tree_log_batch; 727 - u64 tree_log_transid; 728 729 /* 730 * this is used by the balancing code to wait for all the pending ··· 820 struct kobject root_kobj; 821 struct completion kobj_unregister; 822 struct mutex objectid_mutex; 823 struct mutex log_mutex; 824 825 u64 objectid; 826 u64 last_trans; ··· 1835 struct btrfs_path *btrfs_alloc_path(void); 1836 void btrfs_free_path(struct btrfs_path *p); 1837 void btrfs_init_path(struct btrfs_path *p); 1838 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1839 struct btrfs_path *path, int slot, int nr); 1840 int btrfs_del_leaf(struct btrfs_trans_handle *trans,

··· 454 __le32 nsec; 455 } __attribute__ ((__packed__)); 456 457 + enum btrfs_compression_type { 458 BTRFS_COMPRESS_NONE = 0, 459 BTRFS_COMPRESS_ZLIB = 1, 460 BTRFS_COMPRESS_LAST = 2, 461 + }; 462 463 struct btrfs_inode_item { 464 /* nfs style generation number */ ··· 701 struct btrfs_transaction *running_transaction; 702 wait_queue_head_t transaction_throttle; 703 wait_queue_head_t transaction_wait; 704 wait_queue_head_t async_submit_wait; 705 706 struct btrfs_super_block super_copy; 707 struct btrfs_super_block super_for_commit; ··· 711 struct super_block *sb; 712 struct inode *btree_inode; 713 struct backing_dev_info bdi; 714 struct mutex trans_mutex; 715 struct mutex tree_log_mutex; 716 struct mutex transaction_kthread_mutex; ··· 730 atomic_t async_submit_draining; 731 atomic_t nr_async_bios; 732 atomic_t async_delalloc_pages; 733 734 /* 735 * this is used by the balancing code to wait for all the pending ··· 833 struct kobject root_kobj; 834 struct completion kobj_unregister; 835 struct mutex objectid_mutex; 836 + 837 struct mutex log_mutex; 838 + wait_queue_head_t log_writer_wait; 839 + wait_queue_head_t log_commit_wait[2]; 840 + atomic_t log_writers; 841 + atomic_t log_commit[2]; 842 + unsigned long log_transid; 843 + unsigned long log_batch; 844 845 u64 objectid; 846 u64 last_trans; ··· 1841 struct btrfs_path *btrfs_alloc_path(void); 1842 void btrfs_free_path(struct btrfs_path *p); 1843 void btrfs_init_path(struct btrfs_path *p); 1844 + void btrfs_set_path_blocking(struct btrfs_path *p); 1845 + void btrfs_clear_path_blocking(struct btrfs_path *p); 1846 + void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 1847 + 1848 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1849 struct btrfs_path *path, int slot, int nr); 1850 int btrfs_del_leaf(struct btrfs_trans_handle *trans,

+87 -33

fs/btrfs/disk-io.c

··· 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 - #include <linux/version.h> 20 #include <linux/fs.h> 21 #include <linux/blkdev.h> 22 #include <linux/scatterlist.h> ··· 799 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 800 801 if (ret == 0) 802 - buf->flags |= EXTENT_UPTODATE; 803 else 804 WARN_ON(1); 805 return buf; ··· 813 if (btrfs_header_generation(buf) == 814 root->fs_info->running_transaction->transid) { 815 WARN_ON(!btrfs_tree_locked(buf)); 816 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 817 buf); 818 } ··· 853 spin_lock_init(&root->list_lock); 854 mutex_init(&root->objectid_mutex); 855 mutex_init(&root->log_mutex); 856 extent_io_tree_init(&root->dirty_log_pages, 857 fs_info->btree_inode->i_mapping, GFP_NOFS); 858 ··· 945 return 0; 946 } 947 948 - int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 949 - struct btrfs_fs_info *fs_info) 950 { 951 struct btrfs_root *root; 952 struct btrfs_root *tree_root = fs_info->tree_root; 953 954 root = kzalloc(sizeof(*root), GFP_NOFS); 955 if (!root) 956 - return -ENOMEM; 957 958 __setup_root(tree_root->nodesize, tree_root->leafsize, 959 tree_root->sectorsize, tree_root->stripesize, ··· 963 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 964 root->root_key.type = BTRFS_ROOT_ITEM_KEY; 965 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 966 root->ref_cows = 0; 967 968 - root->node = btrfs_alloc_free_block(trans, root, root->leafsize, 969 - 0, BTRFS_TREE_LOG_OBJECTID, 970 - trans->transid, 0, 0, 0); 971 972 btrfs_set_header_nritems(root->node, 0); 973 btrfs_set_header_level(root->node, 0); 974 btrfs_set_header_bytenr(root->node, root->node->start); ··· 991 BTRFS_FSID_SIZE); 992 btrfs_mark_buffer_dirty(root->node); 993 btrfs_tree_unlock(root->node); 994 - fs_info->log_root_tree = root; 995 return 0; 996 } 997 ··· 1200 { 1201 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1202 int ret = 0; 1203 - struct list_head *cur; 1204 struct btrfs_device *device; 1205 struct backing_dev_info *bdi; 1206 #if 0 ··· 1207 btrfs_congested_async(info, 0)) 1208 return 1; 1209 #endif 1210 - list_for_each(cur, &info->fs_devices->devices) { 1211 - device = list_entry(cur, struct btrfs_device, dev_list); 1212 if (!device->bdev) 1213 continue; 1214 bdi = blk_get_backing_dev_info(device->bdev); ··· 1225 */ 1226 static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 1227 { 1228 - struct list_head *cur; 1229 struct btrfs_device *device; 1230 struct btrfs_fs_info *info; 1231 1232 info = (struct btrfs_fs_info *)bdi->unplug_io_data; 1233 - list_for_each(cur, &info->fs_devices->devices) { 1234 - device = list_entry(cur, struct btrfs_device, dev_list); 1235 if (!device->bdev) 1236 continue; 1237 ··· 1507 INIT_LIST_HEAD(&fs_info->dead_roots); 1508 INIT_LIST_HEAD(&fs_info->hashers); 1509 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1510 - spin_lock_init(&fs_info->hash_lock); 1511 spin_lock_init(&fs_info->delalloc_lock); 1512 spin_lock_init(&fs_info->new_trans_lock); 1513 spin_lock_init(&fs_info->ref_cache_lock); ··· 1594 init_waitqueue_head(&fs_info->transaction_throttle); 1595 init_waitqueue_head(&fs_info->transaction_wait); 1596 init_waitqueue_head(&fs_info->async_submit_wait); 1597 - init_waitqueue_head(&fs_info->tree_log_wait); 1598 - atomic_set(&fs_info->tree_log_commit, 0); 1599 - atomic_set(&fs_info->tree_log_writers, 0); 1600 - fs_info->tree_log_transid = 0; 1601 1602 __setup_root(4096, 4096, 4096, 4096, tree_root, 1603 fs_info, BTRFS_ROOT_TREE_OBJECTID); ··· 1682 * low idle thresh 1683 */ 1684 fs_info->endio_workers.idle_thresh = 4; 1685 fs_info->endio_write_workers.idle_thresh = 64; 1686 fs_info->endio_meta_write_workers.idle_thresh = 64; 1687 ··· 1797 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1798 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1799 "btrfs-cleaner"); 1800 - if (!fs_info->cleaner_kthread) 1801 goto fail_csum_root; 1802 1803 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1804 tree_root, 1805 "btrfs-transaction"); 1806 - if (!fs_info->transaction_kthread) 1807 goto fail_cleaner; 1808 1809 if (btrfs_super_log_root(disk_super) != 0) { ··· 1885 fail_iput: 1886 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 1887 iput(fs_info->btree_inode); 1888 - fail: 1889 btrfs_close_devices(fs_info->fs_devices); 1890 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1891 1892 kfree(extent_root); 1893 kfree(tree_root); 1894 - bdi_destroy(&fs_info->bdi); 1895 kfree(fs_info); 1896 kfree(chunk_root); 1897 kfree(dev_root); ··· 2053 2054 int write_all_supers(struct btrfs_root *root, int max_mirrors) 2055 { 2056 - struct list_head *cur; 2057 struct list_head *head = &root->fs_info->fs_devices->devices; 2058 struct btrfs_device *dev; 2059 struct btrfs_super_block *sb; ··· 2068 2069 sb = &root->fs_info->super_for_commit; 2070 dev_item = &sb->dev_item; 2071 - list_for_each(cur, head) { 2072 - dev = list_entry(cur, struct btrfs_device, dev_list); 2073 if (!dev->bdev) { 2074 total_errors++; 2075 continue; ··· 2101 } 2102 2103 total_errors = 0; 2104 - list_for_each(cur, head) { 2105 - dev = list_entry(cur, struct btrfs_device, dev_list); 2106 if (!dev->bdev) 2107 continue; 2108 if (!dev->in_fs_metadata || !dev->writeable) ··· 2315 u64 transid = btrfs_header_generation(buf); 2316 struct inode *btree_inode = root->fs_info->btree_inode; 2317 2318 WARN_ON(!btrfs_tree_locked(buf)); 2319 if (transid != root->fs_info->generation) { 2320 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " ··· 2359 int ret; 2360 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2361 if (ret == 0) 2362 - buf->flags |= EXTENT_UPTODATE; 2363 return ret; 2364 } 2365 2366 int btree_lock_page_hook(struct page *page) 2367 { 2368 struct inode *inode = page->mapping->host; 2369 - struct btrfs_root *root = BTRFS_I(inode)->root; 2370 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2371 struct extent_buffer *eb; 2372 unsigned long len; ··· 2380 goto out; 2381 2382 btrfs_tree_lock(eb); 2383 - spin_lock(&root->fs_info->hash_lock); 2384 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2385 - spin_unlock(&root->fs_info->hash_lock); 2386 btrfs_tree_unlock(eb); 2387 free_extent_buffer(eb); 2388 out:

··· 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/fs.h> 20 #include <linux/blkdev.h> 21 #include <linux/scatterlist.h> ··· 800 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 801 802 if (ret == 0) 803 + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 804 else 805 WARN_ON(1); 806 return buf; ··· 814 if (btrfs_header_generation(buf) == 815 root->fs_info->running_transaction->transid) { 816 WARN_ON(!btrfs_tree_locked(buf)); 817 + 818 + /* ugh, clear_extent_buffer_dirty can be expensive */ 819 + btrfs_set_lock_blocking(buf); 820 + 821 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 822 buf); 823 } ··· 850 spin_lock_init(&root->list_lock); 851 mutex_init(&root->objectid_mutex); 852 mutex_init(&root->log_mutex); 853 + init_waitqueue_head(&root->log_writer_wait); 854 + init_waitqueue_head(&root->log_commit_wait[0]); 855 + init_waitqueue_head(&root->log_commit_wait[1]); 856 + atomic_set(&root->log_commit[0], 0); 857 + atomic_set(&root->log_commit[1], 0); 858 + atomic_set(&root->log_writers, 0); 859 + root->log_batch = 0; 860 + root->log_transid = 0; 861 extent_io_tree_init(&root->dirty_log_pages, 862 fs_info->btree_inode->i_mapping, GFP_NOFS); 863 ··· 934 return 0; 935 } 936 937 + static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 938 + struct btrfs_fs_info *fs_info) 939 { 940 struct btrfs_root *root; 941 struct btrfs_root *tree_root = fs_info->tree_root; 942 + struct extent_buffer *leaf; 943 944 root = kzalloc(sizeof(*root), GFP_NOFS); 945 if (!root) 946 + return ERR_PTR(-ENOMEM); 947 948 __setup_root(tree_root->nodesize, tree_root->leafsize, 949 tree_root->sectorsize, tree_root->stripesize, ··· 951 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 952 root->root_key.type = BTRFS_ROOT_ITEM_KEY; 953 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 954 + /* 955 + * log trees do not get reference counted because they go away 956 + * before a real commit is actually done. They do store pointers 957 + * to file data extents, and those reference counts still get 958 + * updated (along with back refs to the log tree). 959 + */ 960 root->ref_cows = 0; 961 962 + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 963 + 0, BTRFS_TREE_LOG_OBJECTID, 964 + trans->transid, 0, 0, 0); 965 + if (IS_ERR(leaf)) { 966 + kfree(root); 967 + return ERR_CAST(leaf); 968 + } 969 970 + root->node = leaf; 971 btrfs_set_header_nritems(root->node, 0); 972 btrfs_set_header_level(root->node, 0); 973 btrfs_set_header_bytenr(root->node, root->node->start); ··· 968 BTRFS_FSID_SIZE); 969 btrfs_mark_buffer_dirty(root->node); 970 btrfs_tree_unlock(root->node); 971 + return root; 972 + } 973 + 974 + int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 975 + struct btrfs_fs_info *fs_info) 976 + { 977 + struct btrfs_root *log_root; 978 + 979 + log_root = alloc_log_tree(trans, fs_info); 980 + if (IS_ERR(log_root)) 981 + return PTR_ERR(log_root); 982 + WARN_ON(fs_info->log_root_tree); 983 + fs_info->log_root_tree = log_root; 984 + return 0; 985 + } 986 + 987 + int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 988 + struct btrfs_root *root) 989 + { 990 + struct btrfs_root *log_root; 991 + struct btrfs_inode_item *inode_item; 992 + 993 + log_root = alloc_log_tree(trans, root->fs_info); 994 + if (IS_ERR(log_root)) 995 + return PTR_ERR(log_root); 996 + 997 + log_root->last_trans = trans->transid; 998 + log_root->root_key.offset = root->root_key.objectid; 999 + 1000 + inode_item = &log_root->root_item.inode; 1001 + inode_item->generation = cpu_to_le64(1); 1002 + inode_item->size = cpu_to_le64(3); 1003 + inode_item->nlink = cpu_to_le32(1); 1004 + inode_item->nbytes = cpu_to_le64(root->leafsize); 1005 + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1006 + 1007 + btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); 1008 + btrfs_set_root_generation(&log_root->root_item, trans->transid); 1009 + 1010 + WARN_ON(root->log_root); 1011 + root->log_root = log_root; 1012 + root->log_transid = 0; 1013 return 0; 1014 } 1015 ··· 1136 { 1137 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1138 int ret = 0; 1139 struct btrfs_device *device; 1140 struct backing_dev_info *bdi; 1141 #if 0 ··· 1144 btrfs_congested_async(info, 0)) 1145 return 1; 1146 #endif 1147 + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1148 if (!device->bdev) 1149 continue; 1150 bdi = blk_get_backing_dev_info(device->bdev); ··· 1163 */ 1164 static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 1165 { 1166 struct btrfs_device *device; 1167 struct btrfs_fs_info *info; 1168 1169 info = (struct btrfs_fs_info *)bdi->unplug_io_data; 1170 + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1171 if (!device->bdev) 1172 continue; 1173 ··· 1447 INIT_LIST_HEAD(&fs_info->dead_roots); 1448 INIT_LIST_HEAD(&fs_info->hashers); 1449 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1450 spin_lock_init(&fs_info->delalloc_lock); 1451 spin_lock_init(&fs_info->new_trans_lock); 1452 spin_lock_init(&fs_info->ref_cache_lock); ··· 1535 init_waitqueue_head(&fs_info->transaction_throttle); 1536 init_waitqueue_head(&fs_info->transaction_wait); 1537 init_waitqueue_head(&fs_info->async_submit_wait); 1538 1539 __setup_root(4096, 4096, 4096, 4096, tree_root, 1540 fs_info, BTRFS_ROOT_TREE_OBJECTID); ··· 1627 * low idle thresh 1628 */ 1629 fs_info->endio_workers.idle_thresh = 4; 1630 + fs_info->endio_meta_workers.idle_thresh = 4; 1631 + 1632 fs_info->endio_write_workers.idle_thresh = 64; 1633 fs_info->endio_meta_write_workers.idle_thresh = 64; 1634 ··· 1740 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1741 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1742 "btrfs-cleaner"); 1743 + if (IS_ERR(fs_info->cleaner_kthread)) 1744 goto fail_csum_root; 1745 1746 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1747 tree_root, 1748 "btrfs-transaction"); 1749 + if (IS_ERR(fs_info->transaction_kthread)) 1750 goto fail_cleaner; 1751 1752 if (btrfs_super_log_root(disk_super) != 0) { ··· 1828 fail_iput: 1829 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 1830 iput(fs_info->btree_inode); 1831 + 1832 btrfs_close_devices(fs_info->fs_devices); 1833 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1834 + bdi_destroy(&fs_info->bdi); 1835 1836 + fail: 1837 kfree(extent_root); 1838 kfree(tree_root); 1839 kfree(fs_info); 1840 kfree(chunk_root); 1841 kfree(dev_root); ··· 1995 1996 int write_all_supers(struct btrfs_root *root, int max_mirrors) 1997 { 1998 struct list_head *head = &root->fs_info->fs_devices->devices; 1999 struct btrfs_device *dev; 2000 struct btrfs_super_block *sb; ··· 2011 2012 sb = &root->fs_info->super_for_commit; 2013 dev_item = &sb->dev_item; 2014 + list_for_each_entry(dev, head, dev_list) { 2015 if (!dev->bdev) { 2016 total_errors++; 2017 continue; ··· 2045 } 2046 2047 total_errors = 0; 2048 + list_for_each_entry(dev, head, dev_list) { 2049 if (!dev->bdev) 2050 continue; 2051 if (!dev->in_fs_metadata || !dev->writeable) ··· 2260 u64 transid = btrfs_header_generation(buf); 2261 struct inode *btree_inode = root->fs_info->btree_inode; 2262 2263 + btrfs_set_lock_blocking(buf); 2264 + 2265 WARN_ON(!btrfs_tree_locked(buf)); 2266 if (transid != root->fs_info->generation) { 2267 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " ··· 2302 int ret; 2303 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2304 if (ret == 0) 2305 + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 2306 return ret; 2307 } 2308 2309 int btree_lock_page_hook(struct page *page) 2310 { 2311 struct inode *inode = page->mapping->host; 2312 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2313 struct extent_buffer *eb; 2314 unsigned long len; ··· 2324 goto out; 2325 2326 btrfs_tree_lock(eb); 2327 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2328 btrfs_tree_unlock(eb); 2329 free_extent_buffer(eb); 2330 out:

+2

fs/btrfs/disk-io.h

··· 98 struct btrfs_fs_info *fs_info); 99 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 100 struct btrfs_fs_info *fs_info); 101 int btree_lock_page_hook(struct page *page); 102 #endif

··· 98 struct btrfs_fs_info *fs_info); 99 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 100 struct btrfs_fs_info *fs_info); 101 + int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 102 + struct btrfs_root *root); 103 int btree_lock_page_hook(struct page *page); 104 #endif

+366 -74

fs/btrfs/extent-tree.c

··· 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 - #include <linux/version.h> 23 #include "compat.h" 24 #include "hash.h" 25 #include "crc32c.h" ··· 30 #include "volumes.h" 31 #include "locking.h" 32 #include "ref-cache.h" 33 - #include "compat.h" 34 35 #define PENDING_EXTENT_INSERT 0 36 #define PENDING_EXTENT_DELETE 1 ··· 325 u64 flags) 326 { 327 struct list_head *head = &info->space_info; 328 - struct list_head *cur; 329 struct btrfs_space_info *found; 330 - list_for_each(cur, head) { 331 - found = list_entry(cur, struct btrfs_space_info, list); 332 if (found->flags == flags) 333 return found; 334 } ··· 1522 return ret; 1523 } 1524 1525 - int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1526 - struct extent_buffer *orig_buf, struct extent_buffer *buf, 1527 - u32 *nr_extents) 1528 { 1529 u64 bytenr; 1530 u64 ref_root; 1531 u64 orig_root; 1532 u64 ref_generation; 1533 u64 orig_generation; 1534 u32 nritems; 1535 u32 nr_file_extents = 0; 1536 struct btrfs_key key; ··· 1579 int level; 1580 int ret = 0; 1581 int faili = 0; 1582 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1583 u64, u64, u64, u64, u64, u64, u64, u64); 1584 ··· 1591 1592 nritems = btrfs_header_nritems(buf); 1593 level = btrfs_header_level(buf); 1594 1595 if (root->ref_cows) { 1596 process_func = __btrfs_inc_extent_ref; ··· 1607 process_func = __btrfs_update_extent_ref; 1608 } 1609 1610 for (i = 0; i < nritems; i++) { 1611 cond_resched(); 1612 if (level == 0) { ··· 1628 continue; 1629 1630 nr_file_extents++; 1631 1632 ret = process_func(trans, root, bytenr, 1633 orig_buf->start, buf->start, ··· 1662 key.objectid); 1663 1664 if (ret) { 1665 - faili = i; 1666 WARN_ON(1); 1667 goto fail; 1668 } 1669 } else { 1670 - bytenr = btrfs_node_blockptr(buf, i); 1671 ret = process_func(trans, root, bytenr, 1672 orig_buf->start, buf->start, 1673 orig_root, ref_root, 1674 orig_generation, ref_generation, 1675 level - 1); 1676 if (ret) { 1677 - faili = i; 1678 WARN_ON(1); 1679 goto fail; 1680 } 1681 } 1682 } 1683 out: 1684 if (nr_extents) { 1685 if (level == 0) 1686 *nr_extents = nr_file_extents; ··· 1689 } 1690 return 0; 1691 fail: 1692 WARN_ON(1); 1693 return ret; 1694 } ··· 2233 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2234 &end, EXTENT_WRITEBACK); 2235 if (ret) { 2236 - if (skipped && all && !num_inserts) { 2237 skipped = 0; 2238 search = 0; 2239 continue; ··· 2622 if (ret) { 2623 if (all && skipped && !nr) { 2624 search = 0; 2625 continue; 2626 } 2627 mutex_unlock(&info->extent_ins_mutex); ··· 2776 /* if metadata always pin */ 2777 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2778 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2779 - struct btrfs_block_group_cache *cache; 2780 - 2781 - /* btrfs_free_reserved_extent */ 2782 - cache = btrfs_lookup_block_group(root->fs_info, bytenr); 2783 - BUG_ON(!cache); 2784 - btrfs_add_free_space(cache, bytenr, num_bytes); 2785 - put_block_group(cache); 2786 update_reserved_extents(root, bytenr, num_bytes, 0); 2787 return 0; 2788 } ··· 3086 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3087 { 3088 struct btrfs_block_group_cache *cache; 3089 - struct list_head *l; 3090 3091 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3092 (unsigned long long)(info->total_bytes - info->bytes_used - ··· 3093 (info->full) ? "" : "not "); 3094 3095 down_read(&info->groups_sem); 3096 - list_for_each(l, &info->block_groups) { 3097 - cache = list_entry(l, struct btrfs_block_group_cache, list); 3098 spin_lock(&cache->lock); 3099 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3100 "%llu pinned %llu reserved\n", ··· 3412 btrfs_set_header_generation(buf, trans->transid); 3413 btrfs_tree_lock(buf); 3414 clean_tree_block(trans, root, buf); 3415 btrfs_set_buffer_uptodate(buf); 3416 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3417 set_extent_dirty(&root->dirty_log_pages, buf->start, 3418 buf->start + buf->len - 1, GFP_NOFS); ··· 3424 buf->start + buf->len - 1, GFP_NOFS); 3425 } 3426 trans->blocks_used++; 3427 return buf; 3428 } 3429 ··· 3462 { 3463 u64 leaf_owner; 3464 u64 leaf_generation; 3465 struct btrfs_key key; 3466 struct btrfs_file_extent_item *fi; 3467 int i; 3468 int nritems; 3469 int ret; 3470 3471 BUG_ON(!btrfs_is_leaf(leaf)); 3472 nritems = btrfs_header_nritems(leaf); 3473 leaf_owner = btrfs_header_owner(leaf); 3474 leaf_generation = btrfs_header_generation(leaf); 3475 3476 for (i = 0; i < nritems; i++) { 3477 u64 disk_bytenr; 3478 cond_resched(); 3479 3480 btrfs_item_key_to_cpu(leaf, &key, i); 3481 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3482 continue; 3483 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3484 if (btrfs_file_extent_type(leaf, fi) == 3485 BTRFS_FILE_EXTENT_INLINE) 3486 continue; 3487 - /* 3488 - * FIXME make sure to insert a trans record that 3489 - * repeats the snapshot del on crash 3490 - */ 3491 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3492 if (disk_bytenr == 0) 3493 continue; 3494 3495 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3496 btrfs_file_extent_disk_num_bytes(leaf, fi), ··· 3539 wake_up(&root->fs_info->transaction_throttle); 3540 cond_resched(); 3541 } 3542 return 0; 3543 } 3544 ··· 3550 { 3551 int i; 3552 int ret; 3553 - struct btrfs_extent_info *info = ref->extents; 3554 3555 for (i = 0; i < ref->nritems; i++) { 3556 ret = __btrfs_free_extent(trans, root, info->bytenr, 3557 info->num_bytes, ref->bytenr, 3558 ref->owner, ref->generation, ··· 3582 info++; 3583 } 3584 3585 return 0; 3586 } 3587 ··· 3627 } 3628 3629 /* 3630 * helper function for drop_snapshot, this walks down the tree dropping ref 3631 * counts as it goes. 3632 */ ··· 3787 struct extent_buffer *next; 3788 struct extent_buffer *cur; 3789 struct extent_buffer *parent; 3790 - struct btrfs_leaf_ref *ref; 3791 u32 blocksize; 3792 int ret; 3793 u32 refs; ··· 3813 if (path->slots[*level] >= 3814 btrfs_header_nritems(cur)) 3815 break; 3816 if (*level == 0) { 3817 ret = btrfs_drop_leaf_ref(trans, root, cur); 3818 BUG_ON(ret); 3819 break; 3820 } 3821 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3822 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3823 blocksize = btrfs_level_size(root, *level - 1); 3824 3825 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3826 BUG_ON(ret); 3827 if (refs != 1) { 3828 parent = path->nodes[*level]; 3829 root_owner = btrfs_header_owner(parent); ··· 3871 3872 continue; 3873 } 3874 - /* 3875 - * at this point, we have a single ref, and since the 3876 - * only place referencing this extent is a dead root 3877 - * the reference count should never go higher. 3878 - * So, we don't need to check it again 3879 - */ 3880 - if (*level == 1) { 3881 - ref = btrfs_lookup_leaf_ref(root, bytenr); 3882 - if (ref && ref->generation != ptr_gen) { 3883 - btrfs_free_leaf_ref(root, ref); 3884 - ref = NULL; 3885 - } 3886 - if (ref) { 3887 - ret = cache_drop_leaf_ref(trans, root, ref); 3888 - BUG_ON(ret); 3889 - btrfs_remove_leaf_ref(root, ref); 3890 - btrfs_free_leaf_ref(root, ref); 3891 - *level = 0; 3892 - break; 3893 - } 3894 - } 3895 - next = btrfs_find_tree_block(root, bytenr, blocksize); 3896 - if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { 3897 - free_extent_buffer(next); 3898 3899 - next = read_tree_block(root, bytenr, blocksize, 3900 - ptr_gen); 3901 - cond_resched(); 3902 - #if 0 3903 - /* 3904 - * this is a debugging check and can go away 3905 - * the ref should never go all the way down to 1 3906 - * at this point 3907 - */ 3908 - ret = lookup_extent_ref(NULL, root, bytenr, blocksize, 3909 - &refs); 3910 - BUG_ON(ret); 3911 - WARN_ON(refs != 1); 3912 - #endif 3913 - } 3914 WARN_ON(*level <= 0); 3915 if (path->nodes[*level-1]) 3916 free_extent_buffer(path->nodes[*level-1]); ··· 3901 root_owner = btrfs_header_owner(parent); 3902 root_gen = btrfs_header_generation(parent); 3903 3904 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3905 parent->start, root_owner, root_gen, 3906 *level, 1); 3907 free_extent_buffer(path->nodes[*level]); 3908 path->nodes[*level] = NULL; 3909 *level += 1; 3910 BUG_ON(ret); 3911 ··· 3962 3963 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3964 btrfs_tree_lock(next); 3965 3966 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3967 &refs); ··· 4030 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4031 struct extent_buffer *node; 4032 struct btrfs_disk_key disk_key; 4033 node = path->nodes[i]; 4034 path->slots[i]++; 4035 *level = i; ··· 4048 return 0; 4049 } else { 4050 struct extent_buffer *parent; 4051 if (path->nodes[*level] == root->node) 4052 parent = path->nodes[*level]; 4053 else ··· 4732 u64 lock_end = 0; 4733 u64 num_bytes; 4734 u64 ext_offset; 4735 - u64 first_pos; 4736 u32 nritems; 4737 int nr_scaned = 0; 4738 int extent_locked = 0; ··· 4740 int ret; 4741 4742 memcpy(&key, leaf_key, sizeof(key)); 4743 - first_pos = INT_LIMIT(loff_t) - extent_key->offset; 4744 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4745 if (key.objectid < ref_path->owner_objectid || 4746 (key.objectid == ref_path->owner_objectid && ··· 4788 if ((key.objectid > ref_path->owner_objectid) || 4789 (key.objectid == ref_path->owner_objectid && 4790 key.type > BTRFS_EXTENT_DATA_KEY) || 4791 - (key.offset >= first_pos + extent_key->offset)) 4792 break; 4793 } 4794 ··· 4821 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4822 ext_offset = btrfs_file_extent_offset(leaf, fi); 4823 4824 - if (first_pos > key.offset - ext_offset) 4825 - first_pos = key.offset - ext_offset; 4826 4827 if (!extent_locked) { 4828 lock_start = key.offset; ··· 5013 } 5014 skip: 5015 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5016 - key.offset >= first_pos + extent_key->offset) 5017 break; 5018 5019 cond_resched(); ··· 5067 ref->bytenr = buf->start; 5068 ref->owner = btrfs_header_owner(buf); 5069 ref->generation = btrfs_header_generation(buf); 5070 ret = btrfs_add_leaf_ref(root, ref, 0); 5071 WARN_ON(ret); 5072 btrfs_free_leaf_ref(root, ref); ··· 6247 path = btrfs_alloc_path(); 6248 BUG_ON(!path); 6249 6250 - btrfs_remove_free_space_cache(block_group); 6251 rb_erase(&block_group->cache_node, 6252 &root->fs_info->block_group_cache_tree); 6253 down_write(&block_group->space_info->groups_sem); 6254 list_del(&block_group->list); 6255 up_write(&block_group->space_info->groups_sem);

··· 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 + #include <linux/sort.h> 23 #include "compat.h" 24 #include "hash.h" 25 #include "crc32c.h" ··· 30 #include "volumes.h" 31 #include "locking.h" 32 #include "ref-cache.h" 33 34 #define PENDING_EXTENT_INSERT 0 35 #define PENDING_EXTENT_DELETE 1 ··· 326 u64 flags) 327 { 328 struct list_head *head = &info->space_info; 329 struct btrfs_space_info *found; 330 + list_for_each_entry(found, head, list) { 331 if (found->flags == flags) 332 return found; 333 } ··· 1525 return ret; 1526 } 1527 1528 + /* when a block goes through cow, we update the reference counts of 1529 + * everything that block points to. The internal pointers of the block 1530 + * can be in just about any order, and it is likely to have clusters of 1531 + * things that are close together and clusters of things that are not. 1532 + * 1533 + * To help reduce the seeks that come with updating all of these reference 1534 + * counts, sort them by byte number before actual updates are done. 1535 + * 1536 + * struct refsort is used to match byte number to slot in the btree block. 1537 + * we sort based on the byte number and then use the slot to actually 1538 + * find the item. 1539 + * 1540 + * struct refsort is smaller than strcut btrfs_item and smaller than 1541 + * struct btrfs_key_ptr. Since we're currently limited to the page size 1542 + * for a btree block, there's no way for a kmalloc of refsorts for a 1543 + * single node to be bigger than a page. 1544 + */ 1545 + struct refsort { 1546 + u64 bytenr; 1547 + u32 slot; 1548 + }; 1549 + 1550 + /* 1551 + * for passing into sort() 1552 + */ 1553 + static int refsort_cmp(const void *a_void, const void *b_void) 1554 + { 1555 + const struct refsort *a = a_void; 1556 + const struct refsort *b = b_void; 1557 + 1558 + if (a->bytenr < b->bytenr) 1559 + return -1; 1560 + if (a->bytenr > b->bytenr) 1561 + return 1; 1562 + return 0; 1563 + } 1564 + 1565 + 1566 + noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, 1567 + struct btrfs_root *root, 1568 + struct extent_buffer *orig_buf, 1569 + struct extent_buffer *buf, u32 *nr_extents) 1570 { 1571 u64 bytenr; 1572 u64 ref_root; 1573 u64 orig_root; 1574 u64 ref_generation; 1575 u64 orig_generation; 1576 + struct refsort *sorted; 1577 u32 nritems; 1578 u32 nr_file_extents = 0; 1579 struct btrfs_key key; ··· 1542 int level; 1543 int ret = 0; 1544 int faili = 0; 1545 + int refi = 0; 1546 + int slot; 1547 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1548 u64, u64, u64, u64, u64, u64, u64, u64); 1549 ··· 1552 1553 nritems = btrfs_header_nritems(buf); 1554 level = btrfs_header_level(buf); 1555 + 1556 + sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 1557 + BUG_ON(!sorted); 1558 1559 if (root->ref_cows) { 1560 process_func = __btrfs_inc_extent_ref; ··· 1565 process_func = __btrfs_update_extent_ref; 1566 } 1567 1568 + /* 1569 + * we make two passes through the items. In the first pass we 1570 + * only record the byte number and slot. Then we sort based on 1571 + * byte number and do the actual work based on the sorted results 1572 + */ 1573 for (i = 0; i < nritems; i++) { 1574 cond_resched(); 1575 if (level == 0) { ··· 1581 continue; 1582 1583 nr_file_extents++; 1584 + sorted[refi].bytenr = bytenr; 1585 + sorted[refi].slot = i; 1586 + refi++; 1587 + } else { 1588 + bytenr = btrfs_node_blockptr(buf, i); 1589 + sorted[refi].bytenr = bytenr; 1590 + sorted[refi].slot = i; 1591 + refi++; 1592 + } 1593 + } 1594 + /* 1595 + * if refi == 0, we didn't actually put anything into the sorted 1596 + * array and we're done 1597 + */ 1598 + if (refi == 0) 1599 + goto out; 1600 + 1601 + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 1602 + 1603 + for (i = 0; i < refi; i++) { 1604 + cond_resched(); 1605 + slot = sorted[i].slot; 1606 + bytenr = sorted[i].bytenr; 1607 + 1608 + if (level == 0) { 1609 + btrfs_item_key_to_cpu(buf, &key, slot); 1610 1611 ret = process_func(trans, root, bytenr, 1612 orig_buf->start, buf->start, ··· 1589 key.objectid); 1590 1591 if (ret) { 1592 + faili = slot; 1593 WARN_ON(1); 1594 goto fail; 1595 } 1596 } else { 1597 ret = process_func(trans, root, bytenr, 1598 orig_buf->start, buf->start, 1599 orig_root, ref_root, 1600 orig_generation, ref_generation, 1601 level - 1); 1602 if (ret) { 1603 + faili = slot; 1604 WARN_ON(1); 1605 goto fail; 1606 } 1607 } 1608 } 1609 out: 1610 + kfree(sorted); 1611 if (nr_extents) { 1612 if (level == 0) 1613 *nr_extents = nr_file_extents; ··· 1616 } 1617 return 0; 1618 fail: 1619 + kfree(sorted); 1620 WARN_ON(1); 1621 return ret; 1622 } ··· 2159 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2160 &end, EXTENT_WRITEBACK); 2161 if (ret) { 2162 + if (skipped && all && !num_inserts && 2163 + list_empty(&update_list)) { 2164 skipped = 0; 2165 search = 0; 2166 continue; ··· 2547 if (ret) { 2548 if (all && skipped && !nr) { 2549 search = 0; 2550 + skipped = 0; 2551 continue; 2552 } 2553 mutex_unlock(&info->extent_ins_mutex); ··· 2700 /* if metadata always pin */ 2701 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2702 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2703 + mutex_lock(&root->fs_info->pinned_mutex); 2704 + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2705 + mutex_unlock(&root->fs_info->pinned_mutex); 2706 update_reserved_extents(root, bytenr, num_bytes, 0); 2707 return 0; 2708 } ··· 3014 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3015 { 3016 struct btrfs_block_group_cache *cache; 3017 3018 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3019 (unsigned long long)(info->total_bytes - info->bytes_used - ··· 3022 (info->full) ? "" : "not "); 3023 3024 down_read(&info->groups_sem); 3025 + list_for_each_entry(cache, &info->block_groups, list) { 3026 spin_lock(&cache->lock); 3027 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3028 "%llu pinned %llu reserved\n", ··· 3342 btrfs_set_header_generation(buf, trans->transid); 3343 btrfs_tree_lock(buf); 3344 clean_tree_block(trans, root, buf); 3345 + 3346 + btrfs_set_lock_blocking(buf); 3347 btrfs_set_buffer_uptodate(buf); 3348 + 3349 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3350 set_extent_dirty(&root->dirty_log_pages, buf->start, 3351 buf->start + buf->len - 1, GFP_NOFS); ··· 3351 buf->start + buf->len - 1, GFP_NOFS); 3352 } 3353 trans->blocks_used++; 3354 + /* this returns a buffer locked for blocking */ 3355 return buf; 3356 } 3357 ··· 3388 { 3389 u64 leaf_owner; 3390 u64 leaf_generation; 3391 + struct refsort *sorted; 3392 struct btrfs_key key; 3393 struct btrfs_file_extent_item *fi; 3394 int i; 3395 int nritems; 3396 int ret; 3397 + int refi = 0; 3398 + int slot; 3399 3400 BUG_ON(!btrfs_is_leaf(leaf)); 3401 nritems = btrfs_header_nritems(leaf); 3402 leaf_owner = btrfs_header_owner(leaf); 3403 leaf_generation = btrfs_header_generation(leaf); 3404 3405 + sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3406 + /* we do this loop twice. The first time we build a list 3407 + * of the extents we have a reference on, then we sort the list 3408 + * by bytenr. The second time around we actually do the 3409 + * extent freeing. 3410 + */ 3411 for (i = 0; i < nritems; i++) { 3412 u64 disk_bytenr; 3413 cond_resched(); 3414 3415 btrfs_item_key_to_cpu(leaf, &key, i); 3416 + 3417 + /* only extents have references, skip everything else */ 3418 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3419 continue; 3420 + 3421 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3422 + 3423 + /* inline extents live in the btree, they don't have refs */ 3424 if (btrfs_file_extent_type(leaf, fi) == 3425 BTRFS_FILE_EXTENT_INLINE) 3426 continue; 3427 + 3428 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3429 + 3430 + /* holes don't have refs */ 3431 if (disk_bytenr == 0) 3432 continue; 3433 + 3434 + sorted[refi].bytenr = disk_bytenr; 3435 + sorted[refi].slot = i; 3436 + refi++; 3437 + } 3438 + 3439 + if (refi == 0) 3440 + goto out; 3441 + 3442 + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3443 + 3444 + for (i = 0; i < refi; i++) { 3445 + u64 disk_bytenr; 3446 + 3447 + disk_bytenr = sorted[i].bytenr; 3448 + slot = sorted[i].slot; 3449 + 3450 + cond_resched(); 3451 + 3452 + btrfs_item_key_to_cpu(leaf, &key, slot); 3453 + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3454 + continue; 3455 + 3456 + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3457 3458 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3459 btrfs_file_extent_disk_num_bytes(leaf, fi), ··· 3428 wake_up(&root->fs_info->transaction_throttle); 3429 cond_resched(); 3430 } 3431 + out: 3432 + kfree(sorted); 3433 return 0; 3434 } 3435 ··· 3437 { 3438 int i; 3439 int ret; 3440 + struct btrfs_extent_info *info; 3441 + struct refsort *sorted; 3442 3443 + if (ref->nritems == 0) 3444 + return 0; 3445 + 3446 + sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 3447 for (i = 0; i < ref->nritems; i++) { 3448 + sorted[i].bytenr = ref->extents[i].bytenr; 3449 + sorted[i].slot = i; 3450 + } 3451 + sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 3452 + 3453 + /* 3454 + * the items in the ref were sorted when the ref was inserted 3455 + * into the ref cache, so this is already in order 3456 + */ 3457 + for (i = 0; i < ref->nritems; i++) { 3458 + info = ref->extents + sorted[i].slot; 3459 ret = __btrfs_free_extent(trans, root, info->bytenr, 3460 info->num_bytes, ref->bytenr, 3461 ref->owner, ref->generation, ··· 3453 info++; 3454 } 3455 3456 + kfree(sorted); 3457 return 0; 3458 } 3459 ··· 3497 } 3498 3499 /* 3500 + * this is used while deleting old snapshots, and it drops the refs 3501 + * on a whole subtree starting from a level 1 node. 3502 + * 3503 + * The idea is to sort all the leaf pointers, and then drop the 3504 + * ref on all the leaves in order. Most of the time the leaves 3505 + * will have ref cache entries, so no leaf IOs will be required to 3506 + * find the extents they have references on. 3507 + * 3508 + * For each leaf, any references it has are also dropped in order 3509 + * 3510 + * This ends up dropping the references in something close to optimal 3511 + * order for reading and modifying the extent allocation tree. 3512 + */ 3513 + static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 3514 + struct btrfs_root *root, 3515 + struct btrfs_path *path) 3516 + { 3517 + u64 bytenr; 3518 + u64 root_owner; 3519 + u64 root_gen; 3520 + struct extent_buffer *eb = path->nodes[1]; 3521 + struct extent_buffer *leaf; 3522 + struct btrfs_leaf_ref *ref; 3523 + struct refsort *sorted = NULL; 3524 + int nritems = btrfs_header_nritems(eb); 3525 + int ret; 3526 + int i; 3527 + int refi = 0; 3528 + int slot = path->slots[1]; 3529 + u32 blocksize = btrfs_level_size(root, 0); 3530 + u32 refs; 3531 + 3532 + if (nritems == 0) 3533 + goto out; 3534 + 3535 + root_owner = btrfs_header_owner(eb); 3536 + root_gen = btrfs_header_generation(eb); 3537 + sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3538 + 3539 + /* 3540 + * step one, sort all the leaf pointers so we don't scribble 3541 + * randomly into the extent allocation tree 3542 + */ 3543 + for (i = slot; i < nritems; i++) { 3544 + sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 3545 + sorted[refi].slot = i; 3546 + refi++; 3547 + } 3548 + 3549 + /* 3550 + * nritems won't be zero, but if we're picking up drop_snapshot 3551 + * after a crash, slot might be > 0, so double check things 3552 + * just in case. 3553 + */ 3554 + if (refi == 0) 3555 + goto out; 3556 + 3557 + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3558 + 3559 + /* 3560 + * the first loop frees everything the leaves point to 3561 + */ 3562 + for (i = 0; i < refi; i++) { 3563 + u64 ptr_gen; 3564 + 3565 + bytenr = sorted[i].bytenr; 3566 + 3567 + /* 3568 + * check the reference count on this leaf. If it is > 1 3569 + * we just decrement it below and don't update any 3570 + * of the refs the leaf points to. 3571 + */ 3572 + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3573 + BUG_ON(ret); 3574 + if (refs != 1) 3575 + continue; 3576 + 3577 + ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 3578 + 3579 + /* 3580 + * the leaf only had one reference, which means the 3581 + * only thing pointing to this leaf is the snapshot 3582 + * we're deleting. It isn't possible for the reference 3583 + * count to increase again later 3584 + * 3585 + * The reference cache is checked for the leaf, 3586 + * and if found we'll be able to drop any refs held by 3587 + * the leaf without needing to read it in. 3588 + */ 3589 + ref = btrfs_lookup_leaf_ref(root, bytenr); 3590 + if (ref && ref->generation != ptr_gen) { 3591 + btrfs_free_leaf_ref(root, ref); 3592 + ref = NULL; 3593 + } 3594 + if (ref) { 3595 + ret = cache_drop_leaf_ref(trans, root, ref); 3596 + BUG_ON(ret); 3597 + btrfs_remove_leaf_ref(root, ref); 3598 + btrfs_free_leaf_ref(root, ref); 3599 + } else { 3600 + /* 3601 + * the leaf wasn't in the reference cache, so 3602 + * we have to read it. 3603 + */ 3604 + leaf = read_tree_block(root, bytenr, blocksize, 3605 + ptr_gen); 3606 + ret = btrfs_drop_leaf_ref(trans, root, leaf); 3607 + BUG_ON(ret); 3608 + free_extent_buffer(leaf); 3609 + } 3610 + atomic_inc(&root->fs_info->throttle_gen); 3611 + wake_up(&root->fs_info->transaction_throttle); 3612 + cond_resched(); 3613 + } 3614 + 3615 + /* 3616 + * run through the loop again to free the refs on the leaves. 3617 + * This is faster than doing it in the loop above because 3618 + * the leaves are likely to be clustered together. We end up 3619 + * working in nice chunks on the extent allocation tree. 3620 + */ 3621 + for (i = 0; i < refi; i++) { 3622 + bytenr = sorted[i].bytenr; 3623 + ret = __btrfs_free_extent(trans, root, bytenr, 3624 + blocksize, eb->start, 3625 + root_owner, root_gen, 0, 1); 3626 + BUG_ON(ret); 3627 + 3628 + atomic_inc(&root->fs_info->throttle_gen); 3629 + wake_up(&root->fs_info->transaction_throttle); 3630 + cond_resched(); 3631 + } 3632 + out: 3633 + kfree(sorted); 3634 + 3635 + /* 3636 + * update the path to show we've processed the entire level 1 3637 + * node. This will get saved into the root's drop_snapshot_progress 3638 + * field so these drops are not repeated again if this transaction 3639 + * commits. 3640 + */ 3641 + path->slots[1] = nritems; 3642 + return 0; 3643 + } 3644 + 3645 + /* 3646 * helper function for drop_snapshot, this walks down the tree dropping ref 3647 * counts as it goes. 3648 */ ··· 3511 struct extent_buffer *next; 3512 struct extent_buffer *cur; 3513 struct extent_buffer *parent; 3514 u32 blocksize; 3515 int ret; 3516 u32 refs; ··· 3538 if (path->slots[*level] >= 3539 btrfs_header_nritems(cur)) 3540 break; 3541 + 3542 + /* the new code goes down to level 1 and does all the 3543 + * leaves pointed to that node in bulk. So, this check 3544 + * for level 0 will always be false. 3545 + * 3546 + * But, the disk format allows the drop_snapshot_progress 3547 + * field in the root to leave things in a state where 3548 + * a leaf will need cleaning up here. If someone crashes 3549 + * with the old code and then boots with the new code, 3550 + * we might find a leaf here. 3551 + */ 3552 if (*level == 0) { 3553 ret = btrfs_drop_leaf_ref(trans, root, cur); 3554 BUG_ON(ret); 3555 break; 3556 } 3557 + 3558 + /* 3559 + * once we get to level one, process the whole node 3560 + * at once, including everything below it. 3561 + */ 3562 + if (*level == 1) { 3563 + ret = drop_level_one_refs(trans, root, path); 3564 + BUG_ON(ret); 3565 + break; 3566 + } 3567 + 3568 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3569 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3570 blocksize = btrfs_level_size(root, *level - 1); 3571 3572 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3573 BUG_ON(ret); 3574 + 3575 + /* 3576 + * if there is more than one reference, we don't need 3577 + * to read that node to drop any references it has. We 3578 + * just drop the ref we hold on that node and move on to the 3579 + * next slot in this level. 3580 + */ 3581 if (refs != 1) { 3582 parent = path->nodes[*level]; 3583 root_owner = btrfs_header_owner(parent); ··· 3567 3568 continue; 3569 } 3570 3571 + /* 3572 + * we need to keep freeing things in the next level down. 3573 + * read the block and loop around to process it 3574 + */ 3575 + next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3576 WARN_ON(*level <= 0); 3577 if (path->nodes[*level-1]) 3578 free_extent_buffer(path->nodes[*level-1]); ··· 3631 root_owner = btrfs_header_owner(parent); 3632 root_gen = btrfs_header_generation(parent); 3633 3634 + /* 3635 + * cleanup and free the reference on the last node 3636 + * we processed 3637 + */ 3638 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3639 parent->start, root_owner, root_gen, 3640 *level, 1); 3641 free_extent_buffer(path->nodes[*level]); 3642 path->nodes[*level] = NULL; 3643 + 3644 *level += 1; 3645 BUG_ON(ret); 3646 ··· 3687 3688 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3689 btrfs_tree_lock(next); 3690 + btrfs_set_lock_blocking(next); 3691 3692 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3693 &refs); ··· 3754 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 3755 struct extent_buffer *node; 3756 struct btrfs_disk_key disk_key; 3757 + 3758 + /* 3759 + * there is more work to do in this level. 3760 + * Update the drop_progress marker to reflect 3761 + * the work we've done so far, and then bump 3762 + * the slot number 3763 + */ 3764 node = path->nodes[i]; 3765 path->slots[i]++; 3766 *level = i; ··· 3765 return 0; 3766 } else { 3767 struct extent_buffer *parent; 3768 + 3769 + /* 3770 + * this whole node is done, free our reference 3771 + * on it and go up one level 3772 + */ 3773 if (path->nodes[*level] == root->node) 3774 parent = path->nodes[*level]; 3775 else ··· 4444 u64 lock_end = 0; 4445 u64 num_bytes; 4446 u64 ext_offset; 4447 + u64 search_end = (u64)-1; 4448 u32 nritems; 4449 int nr_scaned = 0; 4450 int extent_locked = 0; ··· 4452 int ret; 4453 4454 memcpy(&key, leaf_key, sizeof(key)); 4455 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4456 if (key.objectid < ref_path->owner_objectid || 4457 (key.objectid == ref_path->owner_objectid && ··· 4501 if ((key.objectid > ref_path->owner_objectid) || 4502 (key.objectid == ref_path->owner_objectid && 4503 key.type > BTRFS_EXTENT_DATA_KEY) || 4504 + key.offset >= search_end) 4505 break; 4506 } 4507 ··· 4534 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4535 ext_offset = btrfs_file_extent_offset(leaf, fi); 4536 4537 + if (search_end == (u64)-1) { 4538 + search_end = key.offset - ext_offset + 4539 + btrfs_file_extent_ram_bytes(leaf, fi); 4540 + } 4541 4542 if (!extent_locked) { 4543 lock_start = key.offset; ··· 4724 } 4725 skip: 4726 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 4727 + key.offset >= search_end) 4728 break; 4729 4730 cond_resched(); ··· 4778 ref->bytenr = buf->start; 4779 ref->owner = btrfs_header_owner(buf); 4780 ref->generation = btrfs_header_generation(buf); 4781 + 4782 ret = btrfs_add_leaf_ref(root, ref, 0); 4783 WARN_ON(ret); 4784 btrfs_free_leaf_ref(root, ref); ··· 5957 path = btrfs_alloc_path(); 5958 BUG_ON(!path); 5959 5960 + spin_lock(&root->fs_info->block_group_cache_lock); 5961 rb_erase(&block_group->cache_node, 5962 &root->fs_info->block_group_cache_tree); 5963 + spin_unlock(&root->fs_info->block_group_cache_lock); 5964 + btrfs_remove_free_space_cache(block_group); 5965 down_write(&block_group->space_info->groups_sem); 5966 list_del(&block_group->list); 5967 up_write(&block_group->space_info->groups_sem);

+109 -23

fs/btrfs/extent_io.c

··· 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 - #include <linux/version.h> 13 #include <linux/writeback.h> 14 #include <linux/pagevec.h> 15 #include "extent_io.h" ··· 30 static LIST_HEAD(states); 31 32 #define LEAK_DEBUG 0 33 - #ifdef LEAK_DEBUG 34 static DEFINE_SPINLOCK(leak_lock); 35 #endif 36 ··· 119 static struct extent_state *alloc_extent_state(gfp_t mask) 120 { 121 struct extent_state *state; 122 - #ifdef LEAK_DEBUG 123 unsigned long flags; 124 #endif 125 ··· 129 state->state = 0; 130 state->private = 0; 131 state->tree = NULL; 132 - #ifdef LEAK_DEBUG 133 spin_lock_irqsave(&leak_lock, flags); 134 list_add(&state->leak_list, &states); 135 spin_unlock_irqrestore(&leak_lock, flags); ··· 144 if (!state) 145 return; 146 if (atomic_dec_and_test(&state->refs)) { 147 - #ifdef LEAK_DEBUG 148 unsigned long flags; 149 #endif 150 WARN_ON(state->tree); 151 - #ifdef LEAK_DEBUG 152 spin_lock_irqsave(&leak_lock, flags); 153 list_del(&state->leak_list); 154 spin_unlock_irqrestore(&leak_lock, flags); ··· 2377 int scanned = 0; 2378 int range_whole = 0; 2379 2380 - if (wbc->nonblocking && bdi_write_congested(bdi)) { 2381 - wbc->encountered_congestion = 1; 2382 - return 0; 2383 - } 2384 - 2385 pagevec_init(&pvec, 0); 2386 if (wbc->range_cyclic) { 2387 index = mapping->writeback_index; /* Start from prev offset */ ··· 2849 return sector; 2850 } 2851 2852 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2853 unsigned long i) 2854 { ··· 2978 gfp_t mask) 2979 { 2980 struct extent_buffer *eb = NULL; 2981 - #ifdef LEAK_DEBUG 2982 unsigned long flags; 2983 #endif 2984 2985 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2986 eb->start = start; 2987 eb->len = len; 2988 - mutex_init(&eb->mutex); 2989 - #ifdef LEAK_DEBUG 2990 spin_lock_irqsave(&leak_lock, flags); 2991 list_add(&eb->leak_list, &buffers); 2992 spin_unlock_irqrestore(&leak_lock, flags); ··· 3000 3001 static void __free_extent_buffer(struct extent_buffer *eb) 3002 { 3003 - #ifdef LEAK_DEBUG 3004 unsigned long flags; 3005 spin_lock_irqsave(&leak_lock, flags); 3006 list_del(&eb->leak_list); ··· 3068 unlock_page(p); 3069 } 3070 if (uptodate) 3071 - eb->flags |= EXTENT_UPTODATE; 3072 - eb->flags |= EXTENT_BUFFER_FILLED; 3073 3074 spin_lock(&tree->buffer_lock); 3075 exists = buffer_tree_insert(tree, start, &eb->rb_node); ··· 3222 unsigned long num_pages; 3223 3224 num_pages = num_extent_pages(eb->start, eb->len); 3225 - eb->flags &= ~EXTENT_UPTODATE; 3226 3227 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3228 GFP_NOFS); ··· 3293 struct page *page; 3294 int pg_uptodate = 1; 3295 3296 - if (eb->flags & EXTENT_UPTODATE) 3297 return 1; 3298 3299 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3329 struct bio *bio = NULL; 3330 unsigned long bio_flags = 0; 3331 3332 - if (eb->flags & EXTENT_UPTODATE) 3333 return 0; 3334 3335 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3360 } 3361 if (all_uptodate) { 3362 if (start_i == 0) 3363 - eb->flags |= EXTENT_UPTODATE; 3364 goto unlock_exit; 3365 } 3366 ··· 3396 } 3397 3398 if (!ret) 3399 - eb->flags |= EXTENT_UPTODATE; 3400 return ret; 3401 3402 unlock_exit: ··· 3493 unmap_extent_buffer(eb, eb->map_token, km); 3494 eb->map_token = NULL; 3495 save = 1; 3496 - WARN_ON(!mutex_is_locked(&eb->mutex)); 3497 } 3498 err = map_private_extent_buffer(eb, start, min_len, token, map, 3499 map_start, map_len, km);

··· 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 #include <linux/writeback.h> 13 #include <linux/pagevec.h> 14 #include "extent_io.h" ··· 31 static LIST_HEAD(states); 32 33 #define LEAK_DEBUG 0 34 + #if LEAK_DEBUG 35 static DEFINE_SPINLOCK(leak_lock); 36 #endif 37 ··· 120 static struct extent_state *alloc_extent_state(gfp_t mask) 121 { 122 struct extent_state *state; 123 + #if LEAK_DEBUG 124 unsigned long flags; 125 #endif 126 ··· 130 state->state = 0; 131 state->private = 0; 132 state->tree = NULL; 133 + #if LEAK_DEBUG 134 spin_lock_irqsave(&leak_lock, flags); 135 list_add(&state->leak_list, &states); 136 spin_unlock_irqrestore(&leak_lock, flags); ··· 145 if (!state) 146 return; 147 if (atomic_dec_and_test(&state->refs)) { 148 + #if LEAK_DEBUG 149 unsigned long flags; 150 #endif 151 WARN_ON(state->tree); 152 + #if LEAK_DEBUG 153 spin_lock_irqsave(&leak_lock, flags); 154 list_del(&state->leak_list); 155 spin_unlock_irqrestore(&leak_lock, flags); ··· 2378 int scanned = 0; 2379 int range_whole = 0; 2380 2381 pagevec_init(&pvec, 0); 2382 if (wbc->range_cyclic) { 2383 index = mapping->writeback_index; /* Start from prev offset */ ··· 2855 return sector; 2856 } 2857 2858 + int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2859 + __u64 start, __u64 len, get_extent_t *get_extent) 2860 + { 2861 + int ret; 2862 + u64 off = start; 2863 + u64 max = start + len; 2864 + u32 flags = 0; 2865 + u64 disko = 0; 2866 + struct extent_map *em = NULL; 2867 + int end = 0; 2868 + u64 em_start = 0, em_len = 0; 2869 + unsigned long emflags; 2870 + ret = 0; 2871 + 2872 + if (len == 0) 2873 + return -EINVAL; 2874 + 2875 + lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2876 + GFP_NOFS); 2877 + em = get_extent(inode, NULL, 0, off, max - off, 0); 2878 + if (!em) 2879 + goto out; 2880 + if (IS_ERR(em)) { 2881 + ret = PTR_ERR(em); 2882 + goto out; 2883 + } 2884 + while (!end) { 2885 + off = em->start + em->len; 2886 + if (off >= max) 2887 + end = 1; 2888 + 2889 + em_start = em->start; 2890 + em_len = em->len; 2891 + 2892 + disko = 0; 2893 + flags = 0; 2894 + 2895 + switch (em->block_start) { 2896 + case EXTENT_MAP_LAST_BYTE: 2897 + end = 1; 2898 + flags |= FIEMAP_EXTENT_LAST; 2899 + break; 2900 + case EXTENT_MAP_HOLE: 2901 + flags |= FIEMAP_EXTENT_UNWRITTEN; 2902 + break; 2903 + case EXTENT_MAP_INLINE: 2904 + flags |= (FIEMAP_EXTENT_DATA_INLINE | 2905 + FIEMAP_EXTENT_NOT_ALIGNED); 2906 + break; 2907 + case EXTENT_MAP_DELALLOC: 2908 + flags |= (FIEMAP_EXTENT_DELALLOC | 2909 + FIEMAP_EXTENT_UNKNOWN); 2910 + break; 2911 + default: 2912 + disko = em->block_start; 2913 + break; 2914 + } 2915 + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2916 + flags |= FIEMAP_EXTENT_ENCODED; 2917 + 2918 + emflags = em->flags; 2919 + free_extent_map(em); 2920 + em = NULL; 2921 + 2922 + if (!end) { 2923 + em = get_extent(inode, NULL, 0, off, max - off, 0); 2924 + if (!em) 2925 + goto out; 2926 + if (IS_ERR(em)) { 2927 + ret = PTR_ERR(em); 2928 + goto out; 2929 + } 2930 + emflags = em->flags; 2931 + } 2932 + if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 2933 + flags |= FIEMAP_EXTENT_LAST; 2934 + end = 1; 2935 + } 2936 + 2937 + ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 2938 + em_len, flags); 2939 + if (ret) 2940 + goto out_free; 2941 + } 2942 + out_free: 2943 + free_extent_map(em); 2944 + out: 2945 + unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2946 + GFP_NOFS); 2947 + return ret; 2948 + } 2949 + 2950 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2951 unsigned long i) 2952 { ··· 2892 gfp_t mask) 2893 { 2894 struct extent_buffer *eb = NULL; 2895 + #if LEAK_DEBUG 2896 unsigned long flags; 2897 #endif 2898 2899 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2900 eb->start = start; 2901 eb->len = len; 2902 + spin_lock_init(&eb->lock); 2903 + init_waitqueue_head(&eb->lock_wq); 2904 + 2905 + #if LEAK_DEBUG 2906 spin_lock_irqsave(&leak_lock, flags); 2907 list_add(&eb->leak_list, &buffers); 2908 spin_unlock_irqrestore(&leak_lock, flags); ··· 2912 2913 static void __free_extent_buffer(struct extent_buffer *eb) 2914 { 2915 + #if LEAK_DEBUG 2916 unsigned long flags; 2917 spin_lock_irqsave(&leak_lock, flags); 2918 list_del(&eb->leak_list); ··· 2980 unlock_page(p); 2981 } 2982 if (uptodate) 2983 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 2984 2985 spin_lock(&tree->buffer_lock); 2986 exists = buffer_tree_insert(tree, start, &eb->rb_node); ··· 3135 unsigned long num_pages; 3136 3137 num_pages = num_extent_pages(eb->start, eb->len); 3138 + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3139 3140 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3141 GFP_NOFS); ··· 3206 struct page *page; 3207 int pg_uptodate = 1; 3208 3209 + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3210 return 1; 3211 3212 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3242 struct bio *bio = NULL; 3243 unsigned long bio_flags = 0; 3244 3245 + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3246 return 0; 3247 3248 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3273 } 3274 if (all_uptodate) { 3275 if (start_i == 0) 3276 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3277 goto unlock_exit; 3278 } 3279 ··· 3309 } 3310 3311 if (!ret) 3312 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3313 return ret; 3314 3315 unlock_exit: ··· 3406 unmap_extent_buffer(eb, eb->map_token, km); 3407 eb->map_token = NULL; 3408 save = 1; 3409 } 3410 err = map_private_extent_buffer(eb, start, min_len, token, map, 3411 map_start, map_len, km);

+16 -2

fs/btrfs/extent_io.h

··· 22 /* flags for bio submission */ 23 #define EXTENT_BIO_COMPRESSED 1 24 25 /* 26 * page->private values. Every page that is controlled by the extent 27 * map has page->private set to one. ··· 99 unsigned long map_start; 100 unsigned long map_len; 101 struct page *first_page; 102 atomic_t refs; 103 - int flags; 104 struct list_head leak_list; 105 struct rb_node rb_node; 106 - struct mutex mutex; 107 }; 108 109 struct extent_map_tree; ··· 205 unsigned from, unsigned to); 206 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 207 get_extent_t *get_extent); 208 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); 209 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 210 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);

··· 22 /* flags for bio submission */ 23 #define EXTENT_BIO_COMPRESSED 1 24 25 + /* these are bit numbers for test/set bit */ 26 + #define EXTENT_BUFFER_UPTODATE 0 27 + #define EXTENT_BUFFER_BLOCKING 1 28 + 29 /* 30 * page->private values. Every page that is controlled by the extent 31 * map has page->private set to one. ··· 95 unsigned long map_start; 96 unsigned long map_len; 97 struct page *first_page; 98 + unsigned long bflags; 99 atomic_t refs; 100 struct list_head leak_list; 101 struct rb_node rb_node; 102 + 103 + /* the spinlock is used to protect most operations */ 104 + spinlock_t lock; 105 + 106 + /* 107 + * when we keep the lock held while blocking, waiters go onto 108 + * the wq 109 + */ 110 + wait_queue_head_t lock_wq; 111 }; 112 113 struct extent_map_tree; ··· 193 unsigned from, unsigned to); 194 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 195 get_extent_t *get_extent); 196 + int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 197 + __u64 start, __u64 len, get_extent_t *get_extent); 198 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); 199 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 200 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);

-1

fs/btrfs/extent_map.c

··· 3 #include <linux/slab.h> 4 #include <linux/module.h> 5 #include <linux/spinlock.h> 6 - #include <linux/version.h> 7 #include <linux/hardirq.h> 8 #include "extent_map.h" 9

··· 3 #include <linux/slab.h> 4 #include <linux/module.h> 5 #include <linux/spinlock.h> 6 #include <linux/hardirq.h> 7 #include "extent_map.h" 8

+2 -3

fs/btrfs/file.c

··· 29 #include <linux/writeback.h> 30 #include <linux/statfs.h> 31 #include <linux/compat.h> 32 - #include <linux/version.h> 33 #include "ctree.h" 34 #include "disk-io.h" 35 #include "transaction.h" ··· 1214 } 1215 mutex_unlock(&root->fs_info->trans_mutex); 1216 1217 - root->fs_info->tree_log_batch++; 1218 filemap_fdatawrite(inode->i_mapping); 1219 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1220 - root->fs_info->tree_log_batch++; 1221 1222 /* 1223 * ok we haven't committed the transaction yet, lets do a commit

··· 29 #include <linux/writeback.h> 30 #include <linux/statfs.h> 31 #include <linux/compat.h> 32 #include "ctree.h" 33 #include "disk-io.h" 34 #include "transaction.h" ··· 1215 } 1216 mutex_unlock(&root->fs_info->trans_mutex); 1217 1218 + root->log_batch++; 1219 filemap_fdatawrite(inode->i_mapping); 1220 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1221 + root->log_batch++; 1222 1223 /* 1224 * ok we haven't committed the transaction yet, lets do a commit

+70 -14

fs/btrfs/inode.c

··· 34 #include <linux/statfs.h> 35 #include <linux/compat.h> 36 #include <linux/bit_spinlock.h> 37 - #include <linux/version.h> 38 #include <linux/xattr.h> 39 #include <linux/posix_acl.h> 40 #include <linux/falloc.h> ··· 50 #include "tree-log.h" 51 #include "ref-cache.h" 52 #include "compression.h" 53 54 struct btrfs_iget_args { 55 u64 ino; ··· 90 struct page *locked_page, 91 u64 start, u64 end, int *page_started, 92 unsigned long *nr_written, int unlock); 93 94 /* 95 * a very lame attempt at stopping writes when the FS is 85% full. There ··· 360 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; 361 nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); 362 363 total_compressed = actual_end - start; 364 365 /* we want to make sure that amount of ram required to uncompress ··· 517 goto again; 518 } 519 } else { 520 /* 521 * No compression, but we still need to write the pages in 522 * the file we've been given so far. redirty the locked ··· 1348 struct inode *inode, u64 file_offset, 1349 struct list_head *list) 1350 { 1351 - struct list_head *cur; 1352 struct btrfs_ordered_sum *sum; 1353 1354 btrfs_set_trans_block_group(trans, inode); 1355 - list_for_each(cur, list) { 1356 - sum = list_entry(cur, struct btrfs_ordered_sum, list); 1357 btrfs_csum_file_blocks(trans, 1358 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1359 } ··· 2036 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2037 2038 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2039 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2040 alloc_group_block, 0); 2041 btrfs_free_path(path); ··· 2063 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 2064 break; 2065 default: 2066 init_special_inode(inode, inode->i_mode, rdev); 2067 break; 2068 } ··· 2133 goto failed; 2134 } 2135 2136 leaf = path->nodes[0]; 2137 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2138 struct btrfs_inode_item); ··· 2455 ref->generation = leaf_gen; 2456 ref->nritems = 0; 2457 2458 ret = btrfs_add_leaf_ref(root, ref, 0); 2459 WARN_ON(ret); 2460 btrfs_free_leaf_ref(root, ref); ··· 2504 struct btrfs_path *path; 2505 struct btrfs_key key; 2506 struct btrfs_key found_key; 2507 - u32 found_type; 2508 struct extent_buffer *leaf; 2509 struct btrfs_file_extent_item *fi; 2510 u64 extent_start = 0; ··· 2691 if (pending_del_nr) 2692 goto del_pending; 2693 btrfs_release_path(root, path); 2694 goto search_again; 2695 } 2696 ··· 2709 BUG_ON(ret); 2710 pending_del_nr = 0; 2711 btrfs_release_path(root, path); 2712 goto search_again; 2713 } 2714 } ··· 3297 3298 /* Reached end of directory/root. Bump pos past the last item. */ 3299 if (key_type == BTRFS_DIR_INDEX_KEY) 3300 - filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); 3301 else 3302 filp->f_pos++; 3303 nopos: ··· 3490 root->highest_inode = objectid; 3491 3492 inode->i_uid = current_fsuid(); 3493 - inode->i_gid = current_fsgid(); 3494 inode->i_mode = mode; 3495 inode->i_ino = objectid; 3496 inode_set_bytes(inode, 0); ··· 3625 if (IS_ERR(inode)) 3626 goto out_unlock; 3627 3628 - err = btrfs_init_acl(inode, dir); 3629 if (err) { 3630 drop_inode = 1; 3631 goto out_unlock; ··· 3688 if (IS_ERR(inode)) 3689 goto out_unlock; 3690 3691 - err = btrfs_init_acl(inode, dir); 3692 if (err) { 3693 drop_inode = 1; 3694 goto out_unlock; ··· 3811 3812 drop_on_err = 1; 3813 3814 - err = btrfs_init_acl(inode, dir); 3815 if (err) 3816 goto out_fail; 3817 ··· 4197 return -EINVAL; 4198 } 4199 4200 - static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) 4201 { 4202 - return extent_bmap(mapping, iblock, btrfs_get_extent); 4203 } 4204 4205 int btrfs_readpage(struct file *file, struct page *page) ··· 4773 if (IS_ERR(inode)) 4774 goto out_unlock; 4775 4776 - err = btrfs_init_acl(inode, dir); 4777 if (err) { 4778 drop_inode = 1; 4779 goto out_unlock; ··· 5027 .clear_bit_hook = btrfs_clear_bit_hook, 5028 }; 5029 5030 static struct address_space_operations btrfs_aops = { 5031 .readpage = btrfs_readpage, 5032 .writepage = btrfs_writepage, 5033 .writepages = btrfs_writepages, 5034 .readpages = btrfs_readpages, 5035 .sync_page = block_sync_page, 5036 - .bmap = btrfs_bmap, 5037 .direct_IO = btrfs_direct_IO, 5038 .invalidatepage = btrfs_invalidatepage, 5039 .releasepage = btrfs_releasepage, ··· 5068 .removexattr = btrfs_removexattr, 5069 .permission = btrfs_permission, 5070 .fallocate = btrfs_fallocate, 5071 }; 5072 static struct inode_operations btrfs_special_inode_operations = { 5073 .getattr = btrfs_getattr, ··· 5084 .follow_link = page_follow_link_light, 5085 .put_link = page_put_link, 5086 .permission = btrfs_permission, 5087 };

··· 34 #include <linux/statfs.h> 35 #include <linux/compat.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/xattr.h> 38 #include <linux/posix_acl.h> 39 #include <linux/falloc.h> ··· 51 #include "tree-log.h" 52 #include "ref-cache.h" 53 #include "compression.h" 54 + #include "locking.h" 55 56 struct btrfs_iget_args { 57 u64 ino; ··· 90 struct page *locked_page, 91 u64 start, u64 end, int *page_started, 92 unsigned long *nr_written, int unlock); 93 + 94 + static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 95 + { 96 + int err; 97 + 98 + err = btrfs_init_acl(inode, dir); 99 + if (!err) 100 + err = btrfs_xattr_security_init(inode, dir); 101 + return err; 102 + } 103 104 /* 105 * a very lame attempt at stopping writes when the FS is 85% full. There ··· 350 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; 351 nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); 352 353 + /* 354 + * we don't want to send crud past the end of i_size through 355 + * compression, that's just a waste of CPU time. So, if the 356 + * end of the file is before the start of our current 357 + * requested range of bytes, we bail out to the uncompressed 358 + * cleanup code that can deal with all of this. 359 + * 360 + * It isn't really the fastest way to fix things, but this is a 361 + * very uncommon corner. 362 + */ 363 + if (actual_end <= start) 364 + goto cleanup_and_bail_uncompressed; 365 + 366 total_compressed = actual_end - start; 367 368 /* we want to make sure that amount of ram required to uncompress ··· 494 goto again; 495 } 496 } else { 497 + cleanup_and_bail_uncompressed: 498 /* 499 * No compression, but we still need to write the pages in 500 * the file we've been given so far. redirty the locked ··· 1324 struct inode *inode, u64 file_offset, 1325 struct list_head *list) 1326 { 1327 struct btrfs_ordered_sum *sum; 1328 1329 btrfs_set_trans_block_group(trans, inode); 1330 + 1331 + list_for_each_entry(sum, list, list) { 1332 btrfs_csum_file_blocks(trans, 1333 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1334 } ··· 2013 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2014 2015 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2016 + 2017 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2018 alloc_group_block, 0); 2019 btrfs_free_path(path); ··· 2039 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 2040 break; 2041 default: 2042 + inode->i_op = &btrfs_special_inode_operations; 2043 init_special_inode(inode, inode->i_mode, rdev); 2044 break; 2045 } ··· 2108 goto failed; 2109 } 2110 2111 + btrfs_unlock_up_safe(path, 1); 2112 leaf = path->nodes[0]; 2113 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2114 struct btrfs_inode_item); ··· 2429 ref->generation = leaf_gen; 2430 ref->nritems = 0; 2431 2432 + btrfs_sort_leaf_ref(ref); 2433 + 2434 ret = btrfs_add_leaf_ref(root, ref, 0); 2435 WARN_ON(ret); 2436 btrfs_free_leaf_ref(root, ref); ··· 2476 struct btrfs_path *path; 2477 struct btrfs_key key; 2478 struct btrfs_key found_key; 2479 + u32 found_type = (u8)-1; 2480 struct extent_buffer *leaf; 2481 struct btrfs_file_extent_item *fi; 2482 u64 extent_start = 0; ··· 2663 if (pending_del_nr) 2664 goto del_pending; 2665 btrfs_release_path(root, path); 2666 + if (found_type == BTRFS_INODE_ITEM_KEY) 2667 + break; 2668 goto search_again; 2669 } 2670 ··· 2679 BUG_ON(ret); 2680 pending_del_nr = 0; 2681 btrfs_release_path(root, path); 2682 + if (found_type == BTRFS_INODE_ITEM_KEY) 2683 + break; 2684 goto search_again; 2685 } 2686 } ··· 3265 3266 /* Reached end of directory/root. Bump pos past the last item. */ 3267 if (key_type == BTRFS_DIR_INDEX_KEY) 3268 + filp->f_pos = INT_LIMIT(off_t); 3269 else 3270 filp->f_pos++; 3271 nopos: ··· 3458 root->highest_inode = objectid; 3459 3460 inode->i_uid = current_fsuid(); 3461 + 3462 + if (dir && (dir->i_mode & S_ISGID)) { 3463 + inode->i_gid = dir->i_gid; 3464 + if (S_ISDIR(mode)) 3465 + mode |= S_ISGID; 3466 + } else 3467 + inode->i_gid = current_fsgid(); 3468 + 3469 inode->i_mode = mode; 3470 inode->i_ino = objectid; 3471 inode_set_bytes(inode, 0); ··· 3586 if (IS_ERR(inode)) 3587 goto out_unlock; 3588 3589 + err = btrfs_init_inode_security(inode, dir); 3590 if (err) { 3591 drop_inode = 1; 3592 goto out_unlock; ··· 3649 if (IS_ERR(inode)) 3650 goto out_unlock; 3651 3652 + err = btrfs_init_inode_security(inode, dir); 3653 if (err) { 3654 drop_inode = 1; 3655 goto out_unlock; ··· 3772 3773 drop_on_err = 1; 3774 3775 + err = btrfs_init_inode_security(inode, dir); 3776 if (err) 3777 goto out_fail; 3778 ··· 4158 return -EINVAL; 4159 } 4160 4161 + static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4162 + __u64 start, __u64 len) 4163 { 4164 + return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 4165 } 4166 4167 int btrfs_readpage(struct file *file, struct page *page) ··· 4733 if (IS_ERR(inode)) 4734 goto out_unlock; 4735 4736 + err = btrfs_init_inode_security(inode, dir); 4737 if (err) { 4738 drop_inode = 1; 4739 goto out_unlock; ··· 4987 .clear_bit_hook = btrfs_clear_bit_hook, 4988 }; 4989 4990 + /* 4991 + * btrfs doesn't support the bmap operation because swapfiles 4992 + * use bmap to make a mapping of extents in the file. They assume 4993 + * these extents won't change over the life of the file and they 4994 + * use the bmap result to do IO directly to the drive. 4995 + * 4996 + * the btrfs bmap call would return logical addresses that aren't 4997 + * suitable for IO and they also will change frequently as COW 4998 + * operations happen. So, swapfile + btrfs == corruption. 4999 + * 5000 + * For now we're avoiding this by dropping bmap. 5001 + */ 5002 static struct address_space_operations btrfs_aops = { 5003 .readpage = btrfs_readpage, 5004 .writepage = btrfs_writepage, 5005 .writepages = btrfs_writepages, 5006 .readpages = btrfs_readpages, 5007 .sync_page = block_sync_page, 5008 .direct_IO = btrfs_direct_IO, 5009 .invalidatepage = btrfs_invalidatepage, 5010 .releasepage = btrfs_releasepage, ··· 5017 .removexattr = btrfs_removexattr, 5018 .permission = btrfs_permission, 5019 .fallocate = btrfs_fallocate, 5020 + .fiemap = btrfs_fiemap, 5021 }; 5022 static struct inode_operations btrfs_special_inode_operations = { 5023 .getattr = btrfs_getattr, ··· 5032 .follow_link = page_follow_link_light, 5033 .put_link = page_put_link, 5034 .permission = btrfs_permission, 5035 + .setxattr = btrfs_setxattr, 5036 + .getxattr = btrfs_getxattr, 5037 + .listxattr = btrfs_listxattr, 5038 + .removexattr = btrfs_removexattr, 5039 };

-1

fs/btrfs/ioctl.c

··· 38 #include <linux/compat.h> 39 #include <linux/bit_spinlock.h> 40 #include <linux/security.h> 41 - #include <linux/version.h> 42 #include <linux/xattr.h> 43 #include <linux/vmalloc.h> 44 #include "compat.h"

··· 38 #include <linux/compat.h> 39 #include <linux/bit_spinlock.h> 40 #include <linux/security.h> 41 #include <linux/xattr.h> 42 #include <linux/vmalloc.h> 43 #include "compat.h"

+191 -19

fs/btrfs/locking.c

··· 26 #include "locking.h" 27 28 /* 29 - * locks the per buffer mutex in an extent buffer. This uses adaptive locks 30 - * and the spin is not tuned very extensively. The spinning does make a big 31 - * difference in almost every workload, but spinning for the right amount of 32 - * time needs some help. 33 - * 34 - * In general, we want to spin as long as the lock holder is doing btree 35 - * searches, and we should give up if they are in more expensive code. 36 */ 37 38 - int btrfs_tree_lock(struct extent_buffer *eb) 39 { 40 int i; 41 - 42 - if (mutex_trylock(&eb->mutex)) 43 - return 0; 44 for (i = 0; i < 512; i++) { 45 cpu_relax(); 46 - if (mutex_trylock(&eb->mutex)) 47 - return 0; 48 } 49 - cpu_relax(); 50 - mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); 51 return 0; 52 } 53 54 int btrfs_try_tree_lock(struct extent_buffer *eb) 55 { 56 - return mutex_trylock(&eb->mutex); 57 } 58 59 int btrfs_tree_unlock(struct extent_buffer *eb) 60 { 61 - mutex_unlock(&eb->mutex); 62 return 0; 63 } 64 65 int btrfs_tree_locked(struct extent_buffer *eb) 66 { 67 - return mutex_is_locked(&eb->mutex); 68 } 69 70 /* ··· 245 { 246 int i; 247 struct extent_buffer *eb; 248 for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { 249 eb = path->nodes[i]; 250 if (!eb) 251 break; 252 smp_mb(); 253 - if (!list_empty(&eb->mutex.wait_list)) 254 return 1; 255 } 256 return 0;

··· 26 #include "locking.h" 27 28 /* 29 + * btrfs_header_level() isn't free, so don't call it when lockdep isn't 30 + * on 31 */ 32 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 33 + static inline void spin_nested(struct extent_buffer *eb) 34 + { 35 + spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); 36 + } 37 + #else 38 + static inline void spin_nested(struct extent_buffer *eb) 39 + { 40 + spin_lock(&eb->lock); 41 + } 42 + #endif 43 44 + /* 45 + * Setting a lock to blocking will drop the spinlock and set the 46 + * flag that forces other procs who want the lock to wait. After 47 + * this you can safely schedule with the lock held. 48 + */ 49 + void btrfs_set_lock_blocking(struct extent_buffer *eb) 50 + { 51 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 52 + set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 53 + spin_unlock(&eb->lock); 54 + } 55 + /* exit with the spin lock released and the bit set */ 56 + } 57 + 58 + /* 59 + * clearing the blocking flag will take the spinlock again. 60 + * After this you can't safely schedule 61 + */ 62 + void btrfs_clear_lock_blocking(struct extent_buffer *eb) 63 + { 64 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 65 + spin_nested(eb); 66 + clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 67 + smp_mb__after_clear_bit(); 68 + } 69 + /* exit with the spin lock held */ 70 + } 71 + 72 + /* 73 + * unfortunately, many of the places that currently set a lock to blocking 74 + * don't end up blocking for every long, and often they don't block 75 + * at all. For a dbench 50 run, if we don't spin one the blocking bit 76 + * at all, the context switch rate can jump up to 400,000/sec or more. 77 + * 78 + * So, we're still stuck with this crummy spin on the blocking bit, 79 + * at least until the most common causes of the short blocks 80 + * can be dealt with. 81 + */ 82 + static int btrfs_spin_on_block(struct extent_buffer *eb) 83 { 84 int i; 85 for (i = 0; i < 512; i++) { 86 cpu_relax(); 87 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 88 + return 1; 89 + if (need_resched()) 90 + break; 91 } 92 return 0; 93 } 94 95 + /* 96 + * This is somewhat different from trylock. It will take the 97 + * spinlock but if it finds the lock is set to blocking, it will 98 + * return without the lock held. 99 + * 100 + * returns 1 if it was able to take the lock and zero otherwise 101 + * 102 + * After this call, scheduling is not safe without first calling 103 + * btrfs_set_lock_blocking() 104 + */ 105 + int btrfs_try_spin_lock(struct extent_buffer *eb) 106 + { 107 + int i; 108 + 109 + spin_nested(eb); 110 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 111 + return 1; 112 + spin_unlock(&eb->lock); 113 + 114 + /* spin for a bit on the BLOCKING flag */ 115 + for (i = 0; i < 2; i++) { 116 + if (!btrfs_spin_on_block(eb)) 117 + break; 118 + 119 + spin_nested(eb); 120 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 121 + return 1; 122 + spin_unlock(&eb->lock); 123 + } 124 + return 0; 125 + } 126 + 127 + /* 128 + * the autoremove wake function will return 0 if it tried to wake up 129 + * a process that was already awake, which means that process won't 130 + * count as an exclusive wakeup. The waitq code will continue waking 131 + * procs until it finds one that was actually sleeping. 132 + * 133 + * For btrfs, this isn't quite what we want. We want a single proc 134 + * to be notified that the lock is ready for taking. If that proc 135 + * already happen to be awake, great, it will loop around and try for 136 + * the lock. 137 + * 138 + * So, btrfs_wake_function always returns 1, even when the proc that we 139 + * tried to wake up was already awake. 140 + */ 141 + static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, 142 + int sync, void *key) 143 + { 144 + autoremove_wake_function(wait, mode, sync, key); 145 + return 1; 146 + } 147 + 148 + /* 149 + * returns with the extent buffer spinlocked. 150 + * 151 + * This will spin and/or wait as required to take the lock, and then 152 + * return with the spinlock held. 153 + * 154 + * After this call, scheduling is not safe without first calling 155 + * btrfs_set_lock_blocking() 156 + */ 157 + int btrfs_tree_lock(struct extent_buffer *eb) 158 + { 159 + DEFINE_WAIT(wait); 160 + wait.func = btrfs_wake_function; 161 + 162 + while(1) { 163 + spin_nested(eb); 164 + 165 + /* nobody is blocking, exit with the spinlock held */ 166 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 167 + return 0; 168 + 169 + /* 170 + * we have the spinlock, but the real owner is blocking. 171 + * wait for them 172 + */ 173 + spin_unlock(&eb->lock); 174 + 175 + /* 176 + * spin for a bit, and if the blocking flag goes away, 177 + * loop around 178 + */ 179 + if (btrfs_spin_on_block(eb)) 180 + continue; 181 + 182 + prepare_to_wait_exclusive(&eb->lock_wq, &wait, 183 + TASK_UNINTERRUPTIBLE); 184 + 185 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 186 + schedule(); 187 + 188 + finish_wait(&eb->lock_wq, &wait); 189 + } 190 + return 0; 191 + } 192 + 193 + /* 194 + * Very quick trylock, this does not spin or schedule. It returns 195 + * 1 with the spinlock held if it was able to take the lock, or it 196 + * returns zero if it was unable to take the lock. 197 + * 198 + * After this call, scheduling is not safe without first calling 199 + * btrfs_set_lock_blocking() 200 + */ 201 int btrfs_try_tree_lock(struct extent_buffer *eb) 202 { 203 + if (spin_trylock(&eb->lock)) { 204 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 205 + /* 206 + * we've got the spinlock, but the real owner is 207 + * blocking. Drop the spinlock and return failure 208 + */ 209 + spin_unlock(&eb->lock); 210 + return 0; 211 + } 212 + return 1; 213 + } 214 + /* someone else has the spinlock giveup */ 215 + return 0; 216 } 217 218 int btrfs_tree_unlock(struct extent_buffer *eb) 219 { 220 + /* 221 + * if we were a blocking owner, we don't have the spinlock held 222 + * just clear the bit and look for waiters 223 + */ 224 + if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 225 + smp_mb__after_clear_bit(); 226 + else 227 + spin_unlock(&eb->lock); 228 + 229 + if (waitqueue_active(&eb->lock_wq)) 230 + wake_up(&eb->lock_wq); 231 return 0; 232 } 233 234 int btrfs_tree_locked(struct extent_buffer *eb) 235 { 236 + return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || 237 + spin_is_locked(&eb->lock); 238 } 239 240 /* ··· 75 { 76 int i; 77 struct extent_buffer *eb; 78 + 79 for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { 80 eb = path->nodes[i]; 81 if (!eb) 82 break; 83 smp_mb(); 84 + if (spin_is_contended(&eb->lock) || 85 + waitqueue_active(&eb->lock_wq)) 86 return 1; 87 } 88 return 0;

+6

fs/btrfs/locking.h

··· 22 int btrfs_tree_lock(struct extent_buffer *eb); 23 int btrfs_tree_unlock(struct extent_buffer *eb); 24 int btrfs_tree_locked(struct extent_buffer *eb); 25 int btrfs_try_tree_lock(struct extent_buffer *eb); 26 int btrfs_path_lock_waiting(struct btrfs_path *path, int level); 27 #endif

··· 22 int btrfs_tree_lock(struct extent_buffer *eb); 23 int btrfs_tree_unlock(struct extent_buffer *eb); 24 int btrfs_tree_locked(struct extent_buffer *eb); 25 + 26 int btrfs_try_tree_lock(struct extent_buffer *eb); 27 + int btrfs_try_spin_lock(struct extent_buffer *eb); 28 + 29 int btrfs_path_lock_waiting(struct btrfs_path *path, int level); 30 + 31 + void btrfs_set_lock_blocking(struct extent_buffer *eb); 32 + void btrfs_clear_lock_blocking(struct extent_buffer *eb); 33 #endif

+1 -3

fs/btrfs/ordered-data.c

··· 613 struct btrfs_sector_sum *sector_sums; 614 struct btrfs_ordered_extent *ordered; 615 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 - struct list_head *cur; 617 unsigned long num_sectors; 618 unsigned long i; 619 u32 sectorsize = BTRFS_I(inode)->root->sectorsize; ··· 623 return 1; 624 625 mutex_lock(&tree->mutex); 626 - list_for_each_prev(cur, &ordered->list) { 627 - ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); 628 if (disk_bytenr >= ordered_sum->bytenr) { 629 num_sectors = ordered_sum->len / sectorsize; 630 sector_sums = ordered_sum->sums;

··· 613 struct btrfs_sector_sum *sector_sums; 614 struct btrfs_ordered_extent *ordered; 615 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 unsigned long num_sectors; 617 unsigned long i; 618 u32 sectorsize = BTRFS_I(inode)->root->sectorsize; ··· 624 return 1; 625 626 mutex_lock(&tree->mutex); 627 + list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 628 if (disk_bytenr >= ordered_sum->bytenr) { 629 num_sectors = ordered_sum->len / sectorsize; 630 sector_sums = ordered_sum->sums;

+1

fs/btrfs/ref-cache.c

··· 17 */ 18 19 #include <linux/sched.h> 20 #include "ctree.h" 21 #include "ref-cache.h" 22 #include "transaction.h"

··· 17 */ 18 19 #include <linux/sched.h> 20 + #include <linux/sort.h> 21 #include "ctree.h" 22 #include "ref-cache.h" 23 #include "transaction.h"

-1

fs/btrfs/ref-cache.h

··· 73 int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, 74 int shared); 75 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); 76 - 77 #endif

··· 73 int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, 74 int shared); 75 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); 76 #endif

+3 -3

fs/btrfs/super.c

··· 37 #include <linux/ctype.h> 38 #include <linux/namei.h> 39 #include <linux/miscdevice.h> 40 - #include <linux/version.h> 41 #include <linux/magic.h> 42 #include "compat.h" 43 #include "ctree.h" ··· 582 struct btrfs_ioctl_vol_args *vol; 583 struct btrfs_fs_devices *fs_devices; 584 int ret = -ENOTTY; 585 - int len; 586 587 if (!capable(CAP_SYS_ADMIN)) 588 return -EPERM; 589 590 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 591 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { 592 ret = -EFAULT; 593 goto out; 594 } 595 - len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); 596 597 switch (cmd) { 598 case BTRFS_IOC_SCAN_DEV:

··· 37 #include <linux/ctype.h> 38 #include <linux/namei.h> 39 #include <linux/miscdevice.h> 40 #include <linux/magic.h> 41 #include "compat.h" 42 #include "ctree.h" ··· 583 struct btrfs_ioctl_vol_args *vol; 584 struct btrfs_fs_devices *fs_devices; 585 int ret = -ENOTTY; 586 587 if (!capable(CAP_SYS_ADMIN)) 588 return -EPERM; 589 590 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 591 + if (!vol) 592 + return -ENOMEM; 593 + 594 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { 595 ret = -EFAULT; 596 goto out; 597 } 598 599 switch (cmd) { 600 case BTRFS_IOC_SCAN_DEV:

+1 -3

fs/btrfs/transaction.c

··· 852 { 853 struct btrfs_pending_snapshot *pending; 854 struct list_head *head = &trans->transaction->pending_snapshots; 855 - struct list_head *cur; 856 int ret; 857 858 - list_for_each(cur, head) { 859 - pending = list_entry(cur, struct btrfs_pending_snapshot, list); 860 ret = create_pending_snapshot(trans, fs_info, pending); 861 BUG_ON(ret); 862 }

··· 852 { 853 struct btrfs_pending_snapshot *pending; 854 struct list_head *head = &trans->transaction->pending_snapshots; 855 int ret; 856 857 + list_for_each_entry(pending, head, list) { 858 ret = create_pending_snapshot(trans, fs_info, pending); 859 BUG_ON(ret); 860 }

+1

fs/btrfs/tree-defrag.c

··· 74 u32 nritems; 75 76 root_node = btrfs_lock_root_node(root); 77 nritems = btrfs_header_nritems(root_node); 78 root->defrag_max.objectid = 0; 79 /* from above we know this is not a leaf */

··· 74 u32 nritems; 75 76 root_node = btrfs_lock_root_node(root); 77 + btrfs_set_lock_blocking(root_node); 78 nritems = btrfs_header_nritems(root_node); 79 root->defrag_max.objectid = 0; 80 /* from above we know this is not a leaf */

+170 -184

fs/btrfs/tree-log.c

··· 78 */ 79 80 /* 81 - * btrfs_add_log_tree adds a new per-subvolume log tree into the 82 - * tree of log tree roots. This must be called with a tree log transaction 83 - * running (see start_log_trans). 84 - */ 85 - static int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 86 - struct btrfs_root *root) 87 - { 88 - struct btrfs_key key; 89 - struct btrfs_root_item root_item; 90 - struct btrfs_inode_item *inode_item; 91 - struct extent_buffer *leaf; 92 - struct btrfs_root *new_root = root; 93 - int ret; 94 - u64 objectid = root->root_key.objectid; 95 - 96 - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 97 - BTRFS_TREE_LOG_OBJECTID, 98 - trans->transid, 0, 0, 0); 99 - if (IS_ERR(leaf)) { 100 - ret = PTR_ERR(leaf); 101 - return ret; 102 - } 103 - 104 - btrfs_set_header_nritems(leaf, 0); 105 - btrfs_set_header_level(leaf, 0); 106 - btrfs_set_header_bytenr(leaf, leaf->start); 107 - btrfs_set_header_generation(leaf, trans->transid); 108 - btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); 109 - 110 - write_extent_buffer(leaf, root->fs_info->fsid, 111 - (unsigned long)btrfs_header_fsid(leaf), 112 - BTRFS_FSID_SIZE); 113 - btrfs_mark_buffer_dirty(leaf); 114 - 115 - inode_item = &root_item.inode; 116 - memset(inode_item, 0, sizeof(*inode_item)); 117 - inode_item->generation = cpu_to_le64(1); 118 - inode_item->size = cpu_to_le64(3); 119 - inode_item->nlink = cpu_to_le32(1); 120 - inode_item->nbytes = cpu_to_le64(root->leafsize); 121 - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 122 - 123 - btrfs_set_root_bytenr(&root_item, leaf->start); 124 - btrfs_set_root_generation(&root_item, trans->transid); 125 - btrfs_set_root_level(&root_item, 0); 126 - btrfs_set_root_refs(&root_item, 0); 127 - btrfs_set_root_used(&root_item, 0); 128 - 129 - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 130 - root_item.drop_level = 0; 131 - 132 - btrfs_tree_unlock(leaf); 133 - free_extent_buffer(leaf); 134 - leaf = NULL; 135 - 136 - btrfs_set_root_dirid(&root_item, 0); 137 - 138 - key.objectid = BTRFS_TREE_LOG_OBJECTID; 139 - key.offset = objectid; 140 - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 141 - ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, 142 - &root_item); 143 - if (ret) 144 - goto fail; 145 - 146 - new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, 147 - &key); 148 - BUG_ON(!new_root); 149 - 150 - WARN_ON(root->log_root); 151 - root->log_root = new_root; 152 - 153 - /* 154 - * log trees do not get reference counted because they go away 155 - * before a real commit is actually done. They do store pointers 156 - * to file data extents, and those reference counts still get 157 - * updated (along with back refs to the log tree). 158 - */ 159 - new_root->ref_cows = 0; 160 - new_root->last_trans = trans->transid; 161 - 162 - /* 163 - * we need to make sure the root block for this new tree 164 - * is marked as dirty in the dirty_log_pages tree. This 165 - * is how it gets flushed down to disk at tree log commit time. 166 - * 167 - * the tree logging mutex keeps others from coming in and changing 168 - * the new_root->node, so we can safely access it here 169 - */ 170 - set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start, 171 - new_root->node->start + new_root->node->len - 1, 172 - GFP_NOFS); 173 - 174 - fail: 175 - return ret; 176 - } 177 - 178 - /* 179 * start a sub transaction and setup the log tree 180 * this increments the log tree writer count to make the people 181 * syncing the tree wait for us to finish ··· 86 struct btrfs_root *root) 87 { 88 int ret; 89 mutex_lock(&root->fs_info->tree_log_mutex); 90 if (!root->fs_info->log_root_tree) { 91 ret = btrfs_init_log_root_tree(trans, root->fs_info); ··· 103 ret = btrfs_add_log_tree(trans, root); 104 BUG_ON(ret); 105 } 106 - atomic_inc(&root->fs_info->tree_log_writers); 107 - root->fs_info->tree_log_batch++; 108 mutex_unlock(&root->fs_info->tree_log_mutex); 109 return 0; 110 } 111 ··· 123 if (!root->log_root) 124 return -ENOENT; 125 126 - mutex_lock(&root->fs_info->tree_log_mutex); 127 if (root->log_root) { 128 ret = 0; 129 - atomic_inc(&root->fs_info->tree_log_writers); 130 - root->fs_info->tree_log_batch++; 131 } 132 - mutex_unlock(&root->fs_info->tree_log_mutex); 133 return ret; 134 } 135 ··· 138 */ 139 static int end_log_trans(struct btrfs_root *root) 140 { 141 - atomic_dec(&root->fs_info->tree_log_writers); 142 - smp_mb(); 143 - if (waitqueue_active(&root->fs_info->tree_log_wait)) 144 - wake_up(&root->fs_info->tree_log_wait); 145 return 0; 146 } 147 ··· 1615 1616 btrfs_tree_lock(next); 1617 clean_tree_block(trans, root, next); 1618 btrfs_wait_tree_block_writeback(next); 1619 btrfs_tree_unlock(next); 1620 ··· 1662 next = path->nodes[*level]; 1663 btrfs_tree_lock(next); 1664 clean_tree_block(trans, root, next); 1665 btrfs_wait_tree_block_writeback(next); 1666 btrfs_tree_unlock(next); 1667 ··· 1720 1721 btrfs_tree_lock(next); 1722 clean_tree_block(trans, root, next); 1723 btrfs_wait_tree_block_writeback(next); 1724 btrfs_tree_unlock(next); 1725 ··· 1793 1794 btrfs_tree_lock(next); 1795 clean_tree_block(trans, log, next); 1796 btrfs_wait_tree_block_writeback(next); 1797 btrfs_tree_unlock(next); 1798 ··· 1817 } 1818 } 1819 btrfs_free_path(path); 1820 - if (wc->free) 1821 - free_extent_buffer(log->node); 1822 return ret; 1823 } 1824 1825 - static int wait_log_commit(struct btrfs_root *log) 1826 { 1827 DEFINE_WAIT(wait); 1828 - u64 transid = log->fs_info->tree_log_transid; 1829 1830 do { 1831 - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, 1832 - TASK_UNINTERRUPTIBLE); 1833 - mutex_unlock(&log->fs_info->tree_log_mutex); 1834 - if (atomic_read(&log->fs_info->tree_log_commit)) 1835 schedule(); 1836 - finish_wait(&log->fs_info->tree_log_wait, &wait); 1837 - mutex_lock(&log->fs_info->tree_log_mutex); 1838 - } while (transid == log->fs_info->tree_log_transid && 1839 - atomic_read(&log->fs_info->tree_log_commit)); 1840 return 0; 1841 } 1842 ··· 1887 int btrfs_sync_log(struct btrfs_trans_handle *trans, 1888 struct btrfs_root *root) 1889 { 1890 int ret; 1891 - unsigned long batch; 1892 struct btrfs_root *log = root->log_root; 1893 1894 - mutex_lock(&log->fs_info->tree_log_mutex); 1895 - if (atomic_read(&log->fs_info->tree_log_commit)) { 1896 - wait_log_commit(log); 1897 - goto out; 1898 } 1899 - atomic_set(&log->fs_info->tree_log_commit, 1); 1900 1901 while (1) { 1902 - batch = log->fs_info->tree_log_batch; 1903 - mutex_unlock(&log->fs_info->tree_log_mutex); 1904 schedule_timeout_uninterruptible(1); 1905 - mutex_lock(&log->fs_info->tree_log_mutex); 1906 - 1907 - while (atomic_read(&log->fs_info->tree_log_writers)) { 1908 - DEFINE_WAIT(wait); 1909 - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, 1910 - TASK_UNINTERRUPTIBLE); 1911 - mutex_unlock(&log->fs_info->tree_log_mutex); 1912 - if (atomic_read(&log->fs_info->tree_log_writers)) 1913 - schedule(); 1914 - mutex_lock(&log->fs_info->tree_log_mutex); 1915 - finish_wait(&log->fs_info->tree_log_wait, &wait); 1916 - } 1917 - if (batch == log->fs_info->tree_log_batch) 1918 break; 1919 } 1920 1921 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 1922 BUG_ON(ret); 1923 - ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, 1924 - &root->fs_info->log_root_tree->dirty_log_pages); 1925 BUG_ON(ret); 1926 1927 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 1928 - log->fs_info->log_root_tree->node->start); 1929 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, 1930 - btrfs_header_level(log->fs_info->log_root_tree->node)); 1931 1932 - write_ctree_super(trans, log->fs_info->tree_root, 2); 1933 - log->fs_info->tree_log_transid++; 1934 - log->fs_info->tree_log_batch = 0; 1935 - atomic_set(&log->fs_info->tree_log_commit, 0); 1936 smp_mb(); 1937 - if (waitqueue_active(&log->fs_info->tree_log_wait)) 1938 - wake_up(&log->fs_info->tree_log_wait); 1939 out: 1940 - mutex_unlock(&log->fs_info->tree_log_mutex); 1941 return 0; 1942 } 1943 ··· 2030 start, end, GFP_NOFS); 2031 } 2032 2033 - log = root->log_root; 2034 - ret = btrfs_del_root(trans, root->fs_info->log_root_tree, 2035 - &log->root_key); 2036 - BUG_ON(ret); 2037 root->log_root = NULL; 2038 - kfree(root->log_root); 2039 return 0; 2040 - } 2041 - 2042 - /* 2043 - * helper function to update the item for a given subvolumes log root 2044 - * in the tree of log roots 2045 - */ 2046 - static int update_log_root(struct btrfs_trans_handle *trans, 2047 - struct btrfs_root *log) 2048 - { 2049 - u64 bytenr = btrfs_root_bytenr(&log->root_item); 2050 - int ret; 2051 - 2052 - if (log->node->start == bytenr) 2053 - return 0; 2054 - 2055 - btrfs_set_root_bytenr(&log->root_item, log->node->start); 2056 - btrfs_set_root_generation(&log->root_item, trans->transid); 2057 - btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); 2058 - ret = btrfs_update_root(trans, log->fs_info->log_root_tree, 2059 - &log->root_key, &log->root_item); 2060 - BUG_ON(ret); 2061 - return ret; 2062 } 2063 2064 /* ··· 2702 2703 btrfs_free_path(path); 2704 btrfs_free_path(dst_path); 2705 - 2706 - mutex_lock(&root->fs_info->tree_log_mutex); 2707 - ret = update_log_root(trans, log); 2708 - BUG_ON(ret); 2709 - mutex_unlock(&root->fs_info->tree_log_mutex); 2710 out: 2711 return 0; 2712 }

··· 78 */ 79 80 /* 81 * start a sub transaction and setup the log tree 82 * this increments the log tree writer count to make the people 83 * syncing the tree wait for us to finish ··· 184 struct btrfs_root *root) 185 { 186 int ret; 187 + 188 + mutex_lock(&root->log_mutex); 189 + if (root->log_root) { 190 + root->log_batch++; 191 + atomic_inc(&root->log_writers); 192 + mutex_unlock(&root->log_mutex); 193 + return 0; 194 + } 195 mutex_lock(&root->fs_info->tree_log_mutex); 196 if (!root->fs_info->log_root_tree) { 197 ret = btrfs_init_log_root_tree(trans, root->fs_info); ··· 193 ret = btrfs_add_log_tree(trans, root); 194 BUG_ON(ret); 195 } 196 mutex_unlock(&root->fs_info->tree_log_mutex); 197 + root->log_batch++; 198 + atomic_inc(&root->log_writers); 199 + mutex_unlock(&root->log_mutex); 200 return 0; 201 } 202 ··· 212 if (!root->log_root) 213 return -ENOENT; 214 215 + mutex_lock(&root->log_mutex); 216 if (root->log_root) { 217 ret = 0; 218 + atomic_inc(&root->log_writers); 219 } 220 + mutex_unlock(&root->log_mutex); 221 return ret; 222 } 223 ··· 228 */ 229 static int end_log_trans(struct btrfs_root *root) 230 { 231 + if (atomic_dec_and_test(&root->log_writers)) { 232 + smp_mb(); 233 + if (waitqueue_active(&root->log_writer_wait)) 234 + wake_up(&root->log_writer_wait); 235 + } 236 return 0; 237 } 238 ··· 1704 1705 btrfs_tree_lock(next); 1706 clean_tree_block(trans, root, next); 1707 + btrfs_set_lock_blocking(next); 1708 btrfs_wait_tree_block_writeback(next); 1709 btrfs_tree_unlock(next); 1710 ··· 1750 next = path->nodes[*level]; 1751 btrfs_tree_lock(next); 1752 clean_tree_block(trans, root, next); 1753 + btrfs_set_lock_blocking(next); 1754 btrfs_wait_tree_block_writeback(next); 1755 btrfs_tree_unlock(next); 1756 ··· 1807 1808 btrfs_tree_lock(next); 1809 clean_tree_block(trans, root, next); 1810 + btrfs_set_lock_blocking(next); 1811 btrfs_wait_tree_block_writeback(next); 1812 btrfs_tree_unlock(next); 1813 ··· 1879 1880 btrfs_tree_lock(next); 1881 clean_tree_block(trans, log, next); 1882 + btrfs_set_lock_blocking(next); 1883 btrfs_wait_tree_block_writeback(next); 1884 btrfs_tree_unlock(next); 1885 ··· 1902 } 1903 } 1904 btrfs_free_path(path); 1905 return ret; 1906 } 1907 1908 + /* 1909 + * helper function to update the item for a given subvolumes log root 1910 + * in the tree of log roots 1911 + */ 1912 + static int update_log_root(struct btrfs_trans_handle *trans, 1913 + struct btrfs_root *log) 1914 + { 1915 + int ret; 1916 + 1917 + if (log->log_transid == 1) { 1918 + /* insert root item on the first sync */ 1919 + ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, 1920 + &log->root_key, &log->root_item); 1921 + } else { 1922 + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, 1923 + &log->root_key, &log->root_item); 1924 + } 1925 + return ret; 1926 + } 1927 + 1928 + static int wait_log_commit(struct btrfs_root *root, unsigned long transid) 1929 { 1930 DEFINE_WAIT(wait); 1931 + int index = transid % 2; 1932 1933 + /* 1934 + * we only allow two pending log transactions at a time, 1935 + * so we know that if ours is more than 2 older than the 1936 + * current transaction, we're done 1937 + */ 1938 do { 1939 + prepare_to_wait(&root->log_commit_wait[index], 1940 + &wait, TASK_UNINTERRUPTIBLE); 1941 + mutex_unlock(&root->log_mutex); 1942 + if (root->log_transid < transid + 2 && 1943 + atomic_read(&root->log_commit[index])) 1944 schedule(); 1945 + finish_wait(&root->log_commit_wait[index], &wait); 1946 + mutex_lock(&root->log_mutex); 1947 + } while (root->log_transid < transid + 2 && 1948 + atomic_read(&root->log_commit[index])); 1949 + return 0; 1950 + } 1951 + 1952 + static int wait_for_writer(struct btrfs_root *root) 1953 + { 1954 + DEFINE_WAIT(wait); 1955 + while (atomic_read(&root->log_writers)) { 1956 + prepare_to_wait(&root->log_writer_wait, 1957 + &wait, TASK_UNINTERRUPTIBLE); 1958 + mutex_unlock(&root->log_mutex); 1959 + if (atomic_read(&root->log_writers)) 1960 + schedule(); 1961 + mutex_lock(&root->log_mutex); 1962 + finish_wait(&root->log_writer_wait, &wait); 1963 + } 1964 return 0; 1965 } 1966 ··· 1933 int btrfs_sync_log(struct btrfs_trans_handle *trans, 1934 struct btrfs_root *root) 1935 { 1936 + int index1; 1937 + int index2; 1938 int ret; 1939 struct btrfs_root *log = root->log_root; 1940 + struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1941 1942 + mutex_lock(&root->log_mutex); 1943 + index1 = root->log_transid % 2; 1944 + if (atomic_read(&root->log_commit[index1])) { 1945 + wait_log_commit(root, root->log_transid); 1946 + mutex_unlock(&root->log_mutex); 1947 + return 0; 1948 } 1949 + atomic_set(&root->log_commit[index1], 1); 1950 + 1951 + /* wait for previous tree log sync to complete */ 1952 + if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 1953 + wait_log_commit(root, root->log_transid - 1); 1954 1955 while (1) { 1956 + unsigned long batch = root->log_batch; 1957 + mutex_unlock(&root->log_mutex); 1958 schedule_timeout_uninterruptible(1); 1959 + mutex_lock(&root->log_mutex); 1960 + wait_for_writer(root); 1961 + if (batch == root->log_batch) 1962 break; 1963 } 1964 1965 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 1966 BUG_ON(ret); 1967 + 1968 + btrfs_set_root_bytenr(&log->root_item, log->node->start); 1969 + btrfs_set_root_generation(&log->root_item, trans->transid); 1970 + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); 1971 + 1972 + root->log_batch = 0; 1973 + root->log_transid++; 1974 + log->log_transid = root->log_transid; 1975 + smp_mb(); 1976 + /* 1977 + * log tree has been flushed to disk, new modifications of 1978 + * the log will be written to new positions. so it's safe to 1979 + * allow log writers to go in. 1980 + */ 1981 + mutex_unlock(&root->log_mutex); 1982 + 1983 + mutex_lock(&log_root_tree->log_mutex); 1984 + log_root_tree->log_batch++; 1985 + atomic_inc(&log_root_tree->log_writers); 1986 + mutex_unlock(&log_root_tree->log_mutex); 1987 + 1988 + ret = update_log_root(trans, log); 1989 + BUG_ON(ret); 1990 + 1991 + mutex_lock(&log_root_tree->log_mutex); 1992 + if (atomic_dec_and_test(&log_root_tree->log_writers)) { 1993 + smp_mb(); 1994 + if (waitqueue_active(&log_root_tree->log_writer_wait)) 1995 + wake_up(&log_root_tree->log_writer_wait); 1996 + } 1997 + 1998 + index2 = log_root_tree->log_transid % 2; 1999 + if (atomic_read(&log_root_tree->log_commit[index2])) { 2000 + wait_log_commit(log_root_tree, log_root_tree->log_transid); 2001 + mutex_unlock(&log_root_tree->log_mutex); 2002 + goto out; 2003 + } 2004 + atomic_set(&log_root_tree->log_commit[index2], 1); 2005 + 2006 + if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) 2007 + wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); 2008 + 2009 + wait_for_writer(log_root_tree); 2010 + 2011 + ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2012 + &log_root_tree->dirty_log_pages); 2013 BUG_ON(ret); 2014 2015 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2016 + log_root_tree->node->start); 2017 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, 2018 + btrfs_header_level(log_root_tree->node)); 2019 2020 + log_root_tree->log_batch = 0; 2021 + log_root_tree->log_transid++; 2022 smp_mb(); 2023 + 2024 + mutex_unlock(&log_root_tree->log_mutex); 2025 + 2026 + /* 2027 + * nobody else is going to jump in and write the the ctree 2028 + * super here because the log_commit atomic below is protecting 2029 + * us. We must be called with a transaction handle pinning 2030 + * the running transaction open, so a full commit can't hop 2031 + * in and cause problems either. 2032 + */ 2033 + write_ctree_super(trans, root->fs_info->tree_root, 2); 2034 + 2035 + atomic_set(&log_root_tree->log_commit[index2], 0); 2036 + smp_mb(); 2037 + if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) 2038 + wake_up(&log_root_tree->log_commit_wait[index2]); 2039 out: 2040 + atomic_set(&root->log_commit[index1], 0); 2041 + smp_mb(); 2042 + if (waitqueue_active(&root->log_commit_wait[index1])) 2043 + wake_up(&root->log_commit_wait[index1]); 2044 return 0; 2045 } 2046 ··· 2019 start, end, GFP_NOFS); 2020 } 2021 2022 + if (log->log_transid > 0) { 2023 + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, 2024 + &log->root_key); 2025 + BUG_ON(ret); 2026 + } 2027 root->log_root = NULL; 2028 + free_extent_buffer(log->node); 2029 + kfree(log); 2030 return 0; 2031 } 2032 2033 /* ··· 2711 2712 btrfs_free_path(path); 2713 btrfs_free_path(dst_path); 2714 out: 2715 return 0; 2716 }

+19 -30

fs/btrfs/volumes.c

··· 20 #include <linux/buffer_head.h> 21 #include <linux/blkdev.h> 22 #include <linux/random.h> 23 - #include <linux/version.h> 24 #include <asm/div64.h> 25 #include "compat.h" 26 #include "ctree.h" ··· 103 u64 devid, u8 *uuid) 104 { 105 struct btrfs_device *dev; 106 - struct list_head *cur; 107 108 - list_for_each(cur, head) { 109 - dev = list_entry(cur, struct btrfs_device, dev_list); 110 if (dev->devid == devid && 111 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { 112 return dev; ··· 115 116 static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) 117 { 118 - struct list_head *cur; 119 struct btrfs_fs_devices *fs_devices; 120 121 - list_for_each(cur, &fs_uuids) { 122 - fs_devices = list_entry(cur, struct btrfs_fs_devices, list); 123 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) 124 return fs_devices; 125 } ··· 154 loop: 155 spin_lock(&device->io_lock); 156 157 /* take all the bios off the list at once and process them 158 * later on (without the lock held). But, remember the 159 * tail and other pointers so the bios can be properly reinserted ··· 204 * is now congested. Back off and let other work structs 205 * run instead 206 */ 207 - if (pending && bdi_write_congested(bdi) && 208 fs_info->fs_devices->open_devices > 1) { 209 struct bio *old_head; 210 ··· 216 tail->bi_next = old_head; 217 else 218 device->pending_bio_tail = tail; 219 - device->running_pending = 0; 220 221 spin_unlock(&device->io_lock); 222 btrfs_requeue_work(&device->work); ··· 226 } 227 if (again) 228 goto loop; 229 done: 230 return 0; 231 } ··· 347 348 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 349 { 350 - struct list_head *tmp; 351 - struct list_head *cur; 352 - struct btrfs_device *device; 353 354 mutex_lock(&uuid_mutex); 355 again: 356 - list_for_each_safe(cur, tmp, &fs_devices->devices) { 357 - device = list_entry(cur, struct btrfs_device, dev_list); 358 if (device->in_fs_metadata) 359 continue; 360 ··· 382 383 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 384 { 385 - struct list_head *cur; 386 struct btrfs_device *device; 387 388 if (--fs_devices->opened > 0) 389 return 0; 390 391 - list_for_each(cur, &fs_devices->devices) { 392 - device = list_entry(cur, struct btrfs_device, dev_list); 393 if (device->bdev) { 394 close_bdev_exclusive(device->bdev, device->mode); 395 fs_devices->open_devices--; ··· 436 { 437 struct block_device *bdev; 438 struct list_head *head = &fs_devices->devices; 439 - struct list_head *cur; 440 struct btrfs_device *device; 441 struct block_device *latest_bdev = NULL; 442 struct buffer_head *bh; ··· 446 int seeding = 1; 447 int ret = 0; 448 449 - list_for_each(cur, head) { 450 - device = list_entry(cur, struct btrfs_device, dev_list); 451 if (device->bdev) 452 continue; 453 if (!device->name) ··· 573 *(unsigned long long *)disk_super->fsid, 574 *(unsigned long long *)(disk_super->fsid + 8)); 575 } 576 - printk(KERN_INFO "devid %llu transid %llu %s\n", 577 (unsigned long long)devid, (unsigned long long)transid, path); 578 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 579 ··· 1012 } 1013 1014 if (strcmp(device_path, "missing") == 0) { 1015 - struct list_head *cur; 1016 struct list_head *devices; 1017 struct btrfs_device *tmp; 1018 1019 device = NULL; 1020 devices = &root->fs_info->fs_devices->devices; 1021 - list_for_each(cur, devices) { 1022 - tmp = list_entry(cur, struct btrfs_device, dev_list); 1023 if (tmp->in_fs_metadata && !tmp->bdev) { 1024 device = tmp; 1025 break; ··· 1273 struct btrfs_trans_handle *trans; 1274 struct btrfs_device *device; 1275 struct block_device *bdev; 1276 - struct list_head *cur; 1277 struct list_head *devices; 1278 struct super_block *sb = root->fs_info->sb; 1279 u64 total_bytes; ··· 1296 mutex_lock(&root->fs_info->volume_mutex); 1297 1298 devices = &root->fs_info->fs_devices->devices; 1299 - list_for_each(cur, devices) { 1300 - device = list_entry(cur, struct btrfs_device, dev_list); 1301 if (device->bdev == bdev) { 1302 ret = -EEXIST; 1303 goto error; ··· 1695 int btrfs_balance(struct btrfs_root *dev_root) 1696 { 1697 int ret; 1698 - struct list_head *cur; 1699 struct list_head *devices = &dev_root->fs_info->fs_devices->devices; 1700 struct btrfs_device *device; 1701 u64 old_size; ··· 1713 dev_root = dev_root->fs_info->dev_root; 1714 1715 /* step one make some room on all the devices */ 1716 - list_for_each(cur, devices) { 1717 - device = list_entry(cur, struct btrfs_device, dev_list); 1718 old_size = device->total_bytes; 1719 size_to_free = div_factor(old_size, 1); 1720 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);

··· 20 #include <linux/buffer_head.h> 21 #include <linux/blkdev.h> 22 #include <linux/random.h> 23 #include <asm/div64.h> 24 #include "compat.h" 25 #include "ctree.h" ··· 104 u64 devid, u8 *uuid) 105 { 106 struct btrfs_device *dev; 107 108 + list_for_each_entry(dev, head, dev_list) { 109 if (dev->devid == devid && 110 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { 111 return dev; ··· 118 119 static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) 120 { 121 struct btrfs_fs_devices *fs_devices; 122 123 + list_for_each_entry(fs_devices, &fs_uuids, list) { 124 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) 125 return fs_devices; 126 } ··· 159 loop: 160 spin_lock(&device->io_lock); 161 162 + loop_lock: 163 /* take all the bios off the list at once and process them 164 * later on (without the lock held). But, remember the 165 * tail and other pointers so the bios can be properly reinserted ··· 208 * is now congested. Back off and let other work structs 209 * run instead 210 */ 211 + if (pending && bdi_write_congested(bdi) && num_run > 16 && 212 fs_info->fs_devices->open_devices > 1) { 213 struct bio *old_head; 214 ··· 220 tail->bi_next = old_head; 221 else 222 device->pending_bio_tail = tail; 223 + 224 + device->running_pending = 1; 225 226 spin_unlock(&device->io_lock); 227 btrfs_requeue_work(&device->work); ··· 229 } 230 if (again) 231 goto loop; 232 + 233 + spin_lock(&device->io_lock); 234 + if (device->pending_bios) 235 + goto loop_lock; 236 + spin_unlock(&device->io_lock); 237 done: 238 return 0; 239 } ··· 345 346 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 347 { 348 + struct btrfs_device *device, *next; 349 350 mutex_lock(&uuid_mutex); 351 again: 352 + list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 353 if (device->in_fs_metadata) 354 continue; 355 ··· 383 384 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 385 { 386 struct btrfs_device *device; 387 388 if (--fs_devices->opened > 0) 389 return 0; 390 391 + list_for_each_entry(device, &fs_devices->devices, dev_list) { 392 if (device->bdev) { 393 close_bdev_exclusive(device->bdev, device->mode); 394 fs_devices->open_devices--; ··· 439 { 440 struct block_device *bdev; 441 struct list_head *head = &fs_devices->devices; 442 struct btrfs_device *device; 443 struct block_device *latest_bdev = NULL; 444 struct buffer_head *bh; ··· 450 int seeding = 1; 451 int ret = 0; 452 453 + list_for_each_entry(device, head, dev_list) { 454 if (device->bdev) 455 continue; 456 if (!device->name) ··· 578 *(unsigned long long *)disk_super->fsid, 579 *(unsigned long long *)(disk_super->fsid + 8)); 580 } 581 + printk(KERN_CONT "devid %llu transid %llu %s\n", 582 (unsigned long long)devid, (unsigned long long)transid, path); 583 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 584 ··· 1017 } 1018 1019 if (strcmp(device_path, "missing") == 0) { 1020 struct list_head *devices; 1021 struct btrfs_device *tmp; 1022 1023 device = NULL; 1024 devices = &root->fs_info->fs_devices->devices; 1025 + list_for_each_entry(tmp, devices, dev_list) { 1026 if (tmp->in_fs_metadata && !tmp->bdev) { 1027 device = tmp; 1028 break; ··· 1280 struct btrfs_trans_handle *trans; 1281 struct btrfs_device *device; 1282 struct block_device *bdev; 1283 struct list_head *devices; 1284 struct super_block *sb = root->fs_info->sb; 1285 u64 total_bytes; ··· 1304 mutex_lock(&root->fs_info->volume_mutex); 1305 1306 devices = &root->fs_info->fs_devices->devices; 1307 + list_for_each_entry(device, devices, dev_list) { 1308 if (device->bdev == bdev) { 1309 ret = -EEXIST; 1310 goto error; ··· 1704 int btrfs_balance(struct btrfs_root *dev_root) 1705 { 1706 int ret; 1707 struct list_head *devices = &dev_root->fs_info->fs_devices->devices; 1708 struct btrfs_device *device; 1709 u64 old_size; ··· 1723 dev_root = dev_root->fs_info->dev_root; 1724 1725 /* step one make some room on all the devices */ 1726 + list_for_each_entry(device, devices, dev_list) { 1727 old_size = device->total_bytes; 1728 size_to_free = div_factor(old_size, 1); 1729 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);

+45 -3

fs/btrfs/xattr.c

··· 21 #include <linux/slab.h> 22 #include <linux/rwsem.h> 23 #include <linux/xattr.h> 24 #include "ctree.h" 25 #include "btrfs_inode.h" 26 #include "transaction.h" ··· 46 /* lookup the xattr by name */ 47 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 48 strlen(name), 0); 49 - if (!di || IS_ERR(di)) { 50 ret = -ENODATA; 51 goto out; 52 } 53 ··· 66 ret = -ERANGE; 67 goto out; 68 } 69 data_ptr = (unsigned long)((char *)(di + 1) + 70 btrfs_dir_name_len(leaf, di)); 71 read_extent_buffer(leaf, buffer, data_ptr, ··· 98 if (!path) 99 return -ENOMEM; 100 101 - trans = btrfs_start_transaction(root, 1); 102 btrfs_set_trans_block_group(trans, inode); 103 104 /* first lets see if we already have this xattr */ ··· 188 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 189 if (ret < 0) 190 goto err; 191 - ret = 0; 192 advance = 0; 193 while (1) { 194 leaf = path->nodes[0]; ··· 330 if (!btrfs_is_valid_xattr(name)) 331 return -EOPNOTSUPP; 332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 333 }

··· 21 #include <linux/slab.h> 22 #include <linux/rwsem.h> 23 #include <linux/xattr.h> 24 + #include <linux/security.h> 25 #include "ctree.h" 26 #include "btrfs_inode.h" 27 #include "transaction.h" ··· 45 /* lookup the xattr by name */ 46 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 47 strlen(name), 0); 48 + if (!di) { 49 ret = -ENODATA; 50 + goto out; 51 + } else if (IS_ERR(di)) { 52 + ret = PTR_ERR(di); 53 goto out; 54 } 55 ··· 62 ret = -ERANGE; 63 goto out; 64 } 65 + 66 + /* 67 + * The way things are packed into the leaf is like this 68 + * |struct btrfs_dir_item|name|data| 69 + * where name is the xattr name, so security.foo, and data is the 70 + * content of the xattr. data_ptr points to the location in memory 71 + * where the data starts in the in memory leaf 72 + */ 73 data_ptr = (unsigned long)((char *)(di + 1) + 74 btrfs_dir_name_len(leaf, di)); 75 read_extent_buffer(leaf, buffer, data_ptr, ··· 86 if (!path) 87 return -ENOMEM; 88 89 + trans = btrfs_join_transaction(root, 1); 90 btrfs_set_trans_block_group(trans, inode); 91 92 /* first lets see if we already have this xattr */ ··· 176 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 177 if (ret < 0) 178 goto err; 179 advance = 0; 180 while (1) { 181 leaf = path->nodes[0]; ··· 319 if (!btrfs_is_valid_xattr(name)) 320 return -EOPNOTSUPP; 321 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 322 + } 323 + 324 + int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 325 + { 326 + int err; 327 + size_t len; 328 + void *value; 329 + char *suffix; 330 + char *name; 331 + 332 + err = security_inode_init_security(inode, dir, &suffix, &value, &len); 333 + if (err) { 334 + if (err == -EOPNOTSUPP) 335 + return 0; 336 + return err; 337 + } 338 + 339 + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1, 340 + GFP_NOFS); 341 + if (!name) { 342 + err = -ENOMEM; 343 + } else { 344 + strcpy(name, XATTR_SECURITY_PREFIX); 345 + strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 346 + err = __btrfs_setxattr(inode, name, value, len, 0); 347 + kfree(name); 348 + } 349 + 350 + kfree(suffix); 351 + kfree(value); 352 + return err; 353 }

+2

fs/btrfs/xattr.h

··· 36 const void *value, size_t size, int flags); 37 extern int btrfs_removexattr(struct dentry *dentry, const char *name); 38 39 #endif /* __XATTR__ */

··· 36 const void *value, size_t size, int flags); 37 extern int btrfs_removexattr(struct dentry *dentry, const char *name); 38 39 + extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 40 + 41 #endif /* __XATTR__ */