Btrfs: Change btree locking to use explicit blocking points

Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.

So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.

This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.

We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.

The basic idea is:

btrfs_tree_lock() returns with the spin lock held

btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.

If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.

Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.

btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.

btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.

ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

+471 -40
+226 -8
fs/btrfs/ctree.c
··· 54 return path; 55 } 56 57 /* this also releases the path */ 58 void btrfs_free_path(struct btrfs_path *p) 59 { ··· 297 if (IS_ERR(cow)) 298 return PTR_ERR(cow); 299 300 copy_extent_buffer(cow, buf, 0, 0, cow->len); 301 btrfs_set_header_bytenr(cow, cow->start); 302 btrfs_set_header_generation(cow, trans->transid); ··· 424 } 425 426 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 427 ret = __btrfs_cow_block(trans, root, buf, parent, 428 parent_slot, cow_ret, search_start, 0, 429 prealloc_dest); ··· 534 if (parent_nritems == 1) 535 return 0; 536 537 for (i = start_slot; i < end_slot; i++) { 538 int close = 1; 539 ··· 596 search_start = last_block; 597 598 btrfs_tree_lock(cur); 599 err = __btrfs_cow_block(trans, root, cur, parent, i, 600 &cur, search_start, 601 min(16 * blocksize, ··· 895 return 0; 896 897 mid = path->nodes[level]; 898 WARN_ON(!path->locks[level]); 899 WARN_ON(btrfs_header_generation(mid) != trans->transid); 900 ··· 918 /* promote the child to a root */ 919 child = read_node_slot(root, mid, 0); 920 btrfs_tree_lock(child); 921 BUG_ON(!child); 922 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 923 BUG_ON(ret); ··· 935 936 add_root_to_dirty_list(root); 937 btrfs_tree_unlock(child); 938 path->locks[level] = 0; 939 path->nodes[level] = NULL; 940 clean_tree_block(trans, root, mid); ··· 960 left = read_node_slot(root, parent, pslot - 1); 961 if (left) { 962 btrfs_tree_lock(left); 963 wret = btrfs_cow_block(trans, root, left, 964 parent, pslot - 1, &left, 0); 965 if (wret) { ··· 971 right = read_node_slot(root, parent, pslot + 1); 972 if (right) { 973 btrfs_tree_lock(right); 974 wret = btrfs_cow_block(trans, root, right, 975 parent, pslot + 1, &right, 0); 976 if (wret) { ··· 1147 u32 left_nr; 1148 1149 btrfs_tree_lock(left); 1150 left_nr = btrfs_header_nritems(left); 1151 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1152 wret = 1; ··· 1195 */ 1196 if (right) { 1197 u32 right_nr; 1198 btrfs_tree_lock(right); 1199 right_nr = btrfs_header_nritems(right); 1200 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1201 wret = 1; ··· 1310 } 1311 1312 /* 1313 * when we walk down the tree, it is usually safe to unlock the higher layers 1314 * in the tree. The exceptions are when our path goes through slot 0, because 1315 * operations on the tree might require changing key pointers higher up in the ··· 1418 btrfs_tree_unlock(t); 1419 path->locks[i] = 0; 1420 } 1421 } 1422 } 1423 ··· 1518 */ 1519 if (prealloc_block.objectid && 1520 prealloc_block.offset != b->len) { 1521 btrfs_free_reserved_extent(root, 1522 prealloc_block.objectid, 1523 prealloc_block.offset); ··· 1543 goto again; 1544 } 1545 1546 wret = btrfs_cow_block(trans, root, b, 1547 p->nodes[level + 1], 1548 p->slots[level + 1], ··· 1566 if (!p->skip_locking) 1567 p->locks[level] = 1; 1568 1569 ret = check_block(root, p, level); 1570 if (ret) { 1571 ret = -1; ··· 1589 } 1590 1591 ret = bin_search(b, key, level, &slot); 1592 if (level != 0) { 1593 if (ret && slot > 0) 1594 slot -= 1; ··· 1597 if ((p->search_for_split || ins_len > 0) && 1598 btrfs_header_nritems(b) >= 1599 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1600 - int sret = split_node(trans, root, p, level); 1601 BUG_ON(sret > 0); 1602 if (sret) { 1603 ret = sret; ··· 1615 b = p->nodes[level]; 1616 slot = p->slots[level]; 1617 } else if (ins_len < 0) { 1618 - int sret = balance_level(trans, root, p, 1619 - level); 1620 if (sret) { 1621 ret = sret; 1622 goto done; ··· 1658 * of the btree by dropping locks before 1659 * we read. 1660 */ 1661 - if (level > 1) { 1662 btrfs_release_path(NULL, p); 1663 if (tmp) 1664 free_extent_buffer(tmp); ··· 1673 free_extent_buffer(tmp); 1674 goto again; 1675 } else { 1676 if (tmp) 1677 free_extent_buffer(tmp); 1678 if (should_reada) ··· 1683 b = read_node_slot(root, b, slot); 1684 } 1685 } 1686 - if (!p->skip_locking) 1687 - btrfs_tree_lock(b); 1688 } else { 1689 p->slots[level] = slot; 1690 if (ins_len > 0 && 1691 btrfs_leaf_free_space(root, b) < ins_len) { 1692 - int sret = split_leaf(trans, root, key, 1693 p, ins_len, ret == 0); 1694 BUG_ON(sret > 0); 1695 if (sret) { 1696 ret = sret; ··· 1719 } 1720 ret = 1; 1721 done: 1722 if (prealloc_block.objectid) { 1723 btrfs_free_reserved_extent(root, 1724 prealloc_block.objectid, 1725 prealloc_block.offset); 1726 } 1727 - 1728 return ret; 1729 } 1730 ··· 1751 eb = btrfs_lock_root_node(root); 1752 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1753 BUG_ON(ret); 1754 1755 parent = eb; 1756 while (1) { ··· 1778 eb = read_tree_block(root, bytenr, blocksize, 1779 generation); 1780 btrfs_tree_lock(eb); 1781 } 1782 1783 /* ··· 1803 eb = read_tree_block(root, bytenr, blocksize, 1804 generation); 1805 btrfs_tree_lock(eb); 1806 } 1807 1808 ret = btrfs_cow_block(trans, root, eb, parent, slot, ··· 2350 2351 right = read_node_slot(root, upper, slot + 1); 2352 btrfs_tree_lock(right); 2353 free_space = btrfs_leaf_free_space(root, right); 2354 if (free_space < data_size) 2355 goto out_unlock; ··· 2547 2548 left = read_node_slot(root, path->nodes[1], slot - 1); 2549 btrfs_tree_lock(left); 2550 free_space = btrfs_leaf_free_space(root, left); 2551 if (free_space < data_size) { 2552 ret = 1; ··· 3006 sizeof(struct btrfs_item), 1); 3007 path->keep_locks = 0; 3008 BUG_ON(ret); 3009 3010 leaf = path->nodes[0]; 3011 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); ··· 3542 BUG(); 3543 } 3544 out: 3545 return ret; 3546 } 3547 ··· 3910 */ 3911 if (slot >= nritems) { 3912 path->slots[level] = slot; 3913 sret = btrfs_find_next_key(root, path, min_key, level, 3914 cache_only, min_trans); 3915 if (sret == 0) { 3916 btrfs_release_path(root, path); 3917 goto again; 3918 } else { 3919 goto out; 3920 } 3921 } ··· 3929 unlock_up(path, level, 1); 3930 goto out; 3931 } 3932 cur = read_node_slot(root, cur, slot); 3933 3934 btrfs_tree_lock(cur); 3935 path->locks[level - 1] = 1; 3936 path->nodes[level - 1] = cur; 3937 unlock_up(path, level, 1); 3938 } 3939 out: 3940 if (ret == 0) 3941 memcpy(min_key, &found_key, sizeof(found_key)); 3942 return ret; 3943 } 3944 ··· 4038 if (ret < 0) 4039 return ret; 4040 4041 nritems = btrfs_header_nritems(path->nodes[0]); 4042 /* 4043 * by releasing the path above we dropped all our locks. A balance ··· 4069 free_extent_buffer(next); 4070 } 4071 4072 if (level == 1 && (path->locks[1] || path->skip_locking) && 4073 path->reada) 4074 reada_for_search(root, path, level, slot, 0); ··· 4078 if (!path->skip_locking) { 4079 WARN_ON(!btrfs_tree_locked(c)); 4080 btrfs_tree_lock(next); 4081 } 4082 break; 4083 } ··· 4095 path->locks[level] = 1; 4096 if (!level) 4097 break; 4098 if (level == 1 && path->locks[1] && path->reada) 4099 reada_for_search(root, path, level, slot, 0); 4100 next = read_node_slot(root, next, 0); 4101 if (!path->skip_locking) { 4102 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 4103 btrfs_tree_lock(next); 4104 } 4105 } 4106 done: ··· 4128 4129 while (1) { 4130 if (path->slots[0] == 0) { 4131 ret = btrfs_prev_leaf(root, path); 4132 if (ret != 0) 4133 return ret;
··· 54 return path; 55 } 56 57 + /* 58 + * set all locked nodes in the path to blocking locks. This should 59 + * be done before scheduling 60 + */ 61 + noinline void btrfs_set_path_blocking(struct btrfs_path *p) 62 + { 63 + int i; 64 + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 65 + if (p->nodes[i] && p->locks[i]) 66 + btrfs_set_lock_blocking(p->nodes[i]); 67 + } 68 + } 69 + 70 + /* 71 + * reset all the locked nodes in the patch to spinning locks. 72 + */ 73 + noinline void btrfs_clear_path_blocking(struct btrfs_path *p) 74 + { 75 + int i; 76 + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 77 + if (p->nodes[i] && p->locks[i]) 78 + btrfs_clear_lock_blocking(p->nodes[i]); 79 + } 80 + } 81 + 82 /* this also releases the path */ 83 void btrfs_free_path(struct btrfs_path *p) 84 { ··· 272 if (IS_ERR(cow)) 273 return PTR_ERR(cow); 274 275 + /* cow is set to blocking by btrfs_init_new_buffer */ 276 + 277 copy_extent_buffer(cow, buf, 0, 0, cow->len); 278 btrfs_set_header_bytenr(cow, cow->start); 279 btrfs_set_header_generation(cow, trans->transid); ··· 397 } 398 399 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 400 + 401 + if (parent) 402 + btrfs_set_lock_blocking(parent); 403 + btrfs_set_lock_blocking(buf); 404 + 405 ret = __btrfs_cow_block(trans, root, buf, parent, 406 parent_slot, cow_ret, search_start, 0, 407 prealloc_dest); ··· 502 if (parent_nritems == 1) 503 return 0; 504 505 + btrfs_set_lock_blocking(parent); 506 + 507 for (i = start_slot; i < end_slot; i++) { 508 int close = 1; 509 ··· 562 search_start = last_block; 563 564 btrfs_tree_lock(cur); 565 + btrfs_set_lock_blocking(cur); 566 err = __btrfs_cow_block(trans, root, cur, parent, i, 567 &cur, search_start, 568 min(16 * blocksize, ··· 860 return 0; 861 862 mid = path->nodes[level]; 863 + 864 WARN_ON(!path->locks[level]); 865 WARN_ON(btrfs_header_generation(mid) != trans->transid); 866 ··· 882 /* promote the child to a root */ 883 child = read_node_slot(root, mid, 0); 884 btrfs_tree_lock(child); 885 + btrfs_set_lock_blocking(child); 886 BUG_ON(!child); 887 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 888 BUG_ON(ret); ··· 898 899 add_root_to_dirty_list(root); 900 btrfs_tree_unlock(child); 901 + 902 path->locks[level] = 0; 903 path->nodes[level] = NULL; 904 clean_tree_block(trans, root, mid); ··· 922 left = read_node_slot(root, parent, pslot - 1); 923 if (left) { 924 btrfs_tree_lock(left); 925 + btrfs_set_lock_blocking(left); 926 wret = btrfs_cow_block(trans, root, left, 927 parent, pslot - 1, &left, 0); 928 if (wret) { ··· 932 right = read_node_slot(root, parent, pslot + 1); 933 if (right) { 934 btrfs_tree_lock(right); 935 + btrfs_set_lock_blocking(right); 936 wret = btrfs_cow_block(trans, root, right, 937 parent, pslot + 1, &right, 0); 938 if (wret) { ··· 1107 u32 left_nr; 1108 1109 btrfs_tree_lock(left); 1110 + btrfs_set_lock_blocking(left); 1111 + 1112 left_nr = btrfs_header_nritems(left); 1113 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1114 wret = 1; ··· 1153 */ 1154 if (right) { 1155 u32 right_nr; 1156 + 1157 btrfs_tree_lock(right); 1158 + btrfs_set_lock_blocking(right); 1159 + 1160 right_nr = btrfs_header_nritems(right); 1161 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1162 wret = 1; ··· 1265 } 1266 1267 /* 1268 + * returns -EAGAIN if it had to drop the path, or zero if everything was in 1269 + * cache 1270 + */ 1271 + static noinline int reada_for_balance(struct btrfs_root *root, 1272 + struct btrfs_path *path, int level) 1273 + { 1274 + int slot; 1275 + int nritems; 1276 + struct extent_buffer *parent; 1277 + struct extent_buffer *eb; 1278 + u64 gen; 1279 + u64 block1 = 0; 1280 + u64 block2 = 0; 1281 + int ret = 0; 1282 + int blocksize; 1283 + 1284 + parent = path->nodes[level - 1]; 1285 + if (!parent) 1286 + return 0; 1287 + 1288 + nritems = btrfs_header_nritems(parent); 1289 + slot = path->slots[level]; 1290 + blocksize = btrfs_level_size(root, level); 1291 + 1292 + if (slot > 0) { 1293 + block1 = btrfs_node_blockptr(parent, slot - 1); 1294 + gen = btrfs_node_ptr_generation(parent, slot - 1); 1295 + eb = btrfs_find_tree_block(root, block1, blocksize); 1296 + if (eb && btrfs_buffer_uptodate(eb, gen)) 1297 + block1 = 0; 1298 + free_extent_buffer(eb); 1299 + } 1300 + if (slot < nritems) { 1301 + block2 = btrfs_node_blockptr(parent, slot + 1); 1302 + gen = btrfs_node_ptr_generation(parent, slot + 1); 1303 + eb = btrfs_find_tree_block(root, block2, blocksize); 1304 + if (eb && btrfs_buffer_uptodate(eb, gen)) 1305 + block2 = 0; 1306 + free_extent_buffer(eb); 1307 + } 1308 + if (block1 || block2) { 1309 + ret = -EAGAIN; 1310 + btrfs_release_path(root, path); 1311 + if (block1) 1312 + readahead_tree_block(root, block1, blocksize, 0); 1313 + if (block2) 1314 + readahead_tree_block(root, block2, blocksize, 0); 1315 + 1316 + if (block1) { 1317 + eb = read_tree_block(root, block1, blocksize, 0); 1318 + free_extent_buffer(eb); 1319 + } 1320 + if (block1) { 1321 + eb = read_tree_block(root, block2, blocksize, 0); 1322 + free_extent_buffer(eb); 1323 + } 1324 + } 1325 + return ret; 1326 + } 1327 + 1328 + 1329 + /* 1330 * when we walk down the tree, it is usually safe to unlock the higher layers 1331 * in the tree. The exceptions are when our path goes through slot 0, because 1332 * operations on the tree might require changing key pointers higher up in the ··· 1311 btrfs_tree_unlock(t); 1312 path->locks[i] = 0; 1313 } 1314 + } 1315 + } 1316 + 1317 + /* 1318 + * This releases any locks held in the path starting at level and 1319 + * going all the way up to the root. 1320 + * 1321 + * btrfs_search_slot will keep the lock held on higher nodes in a few 1322 + * corner cases, such as COW of the block at slot zero in the node. This 1323 + * ignores those rules, and it should only be called when there are no 1324 + * more updates to be done higher up in the tree. 1325 + */ 1326 + noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1327 + { 1328 + int i; 1329 + 1330 + if (path->keep_locks || path->lowest_level) 1331 + return; 1332 + 1333 + for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1334 + if (!path->nodes[i]) 1335 + break; 1336 + if (!path->locks[i]) 1337 + break; 1338 + btrfs_tree_unlock(path->nodes[i]); 1339 + path->locks[i] = 0; 1340 } 1341 } 1342 ··· 1385 */ 1386 if (prealloc_block.objectid && 1387 prealloc_block.offset != b->len) { 1388 + btrfs_set_path_blocking(p); 1389 btrfs_free_reserved_extent(root, 1390 prealloc_block.objectid, 1391 prealloc_block.offset); ··· 1409 goto again; 1410 } 1411 1412 + btrfs_set_path_blocking(p); 1413 + 1414 wret = btrfs_cow_block(trans, root, b, 1415 p->nodes[level + 1], 1416 p->slots[level + 1], ··· 1430 if (!p->skip_locking) 1431 p->locks[level] = 1; 1432 1433 + btrfs_clear_path_blocking(p); 1434 + 1435 + /* 1436 + * we have a lock on b and as long as we aren't changing 1437 + * the tree, there is no way to for the items in b to change. 1438 + * It is safe to drop the lock on our parent before we 1439 + * go through the expensive btree search on b. 1440 + * 1441 + * If cow is true, then we might be changing slot zero, 1442 + * which may require changing the parent. So, we can't 1443 + * drop the lock until after we know which slot we're 1444 + * operating on. 1445 + */ 1446 + if (!cow) 1447 + btrfs_unlock_up_safe(p, level + 1); 1448 + 1449 ret = check_block(root, p, level); 1450 if (ret) { 1451 ret = -1; ··· 1437 } 1438 1439 ret = bin_search(b, key, level, &slot); 1440 + 1441 if (level != 0) { 1442 if (ret && slot > 0) 1443 slot -= 1; ··· 1444 if ((p->search_for_split || ins_len > 0) && 1445 btrfs_header_nritems(b) >= 1446 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1447 + int sret; 1448 + 1449 + sret = reada_for_balance(root, p, level); 1450 + if (sret) 1451 + goto again; 1452 + 1453 + btrfs_set_path_blocking(p); 1454 + sret = split_node(trans, root, p, level); 1455 + btrfs_clear_path_blocking(p); 1456 + 1457 BUG_ON(sret > 0); 1458 if (sret) { 1459 ret = sret; ··· 1453 b = p->nodes[level]; 1454 slot = p->slots[level]; 1455 } else if (ins_len < 0) { 1456 + int sret; 1457 + 1458 + sret = reada_for_balance(root, p, level); 1459 + if (sret) 1460 + goto again; 1461 + 1462 + btrfs_set_path_blocking(p); 1463 + sret = balance_level(trans, root, p, level); 1464 + btrfs_clear_path_blocking(p); 1465 + 1466 if (sret) { 1467 ret = sret; 1468 goto done; ··· 1488 * of the btree by dropping locks before 1489 * we read. 1490 */ 1491 + if (level > 0) { 1492 btrfs_release_path(NULL, p); 1493 if (tmp) 1494 free_extent_buffer(tmp); ··· 1503 free_extent_buffer(tmp); 1504 goto again; 1505 } else { 1506 + btrfs_set_path_blocking(p); 1507 if (tmp) 1508 free_extent_buffer(tmp); 1509 if (should_reada) ··· 1512 b = read_node_slot(root, b, slot); 1513 } 1514 } 1515 + if (!p->skip_locking) { 1516 + int lret; 1517 + 1518 + btrfs_clear_path_blocking(p); 1519 + lret = btrfs_try_spin_lock(b); 1520 + 1521 + if (!lret) { 1522 + btrfs_set_path_blocking(p); 1523 + btrfs_tree_lock(b); 1524 + btrfs_clear_path_blocking(p); 1525 + } 1526 + } 1527 } else { 1528 p->slots[level] = slot; 1529 if (ins_len > 0 && 1530 btrfs_leaf_free_space(root, b) < ins_len) { 1531 + int sret; 1532 + 1533 + btrfs_set_path_blocking(p); 1534 + sret = split_leaf(trans, root, key, 1535 p, ins_len, ret == 0); 1536 + btrfs_clear_path_blocking(p); 1537 + 1538 BUG_ON(sret > 0); 1539 if (sret) { 1540 ret = sret; ··· 1533 } 1534 ret = 1; 1535 done: 1536 + /* 1537 + * we don't really know what they plan on doing with the path 1538 + * from here on, so for now just mark it as blocking 1539 + */ 1540 + btrfs_set_path_blocking(p); 1541 if (prealloc_block.objectid) { 1542 btrfs_free_reserved_extent(root, 1543 prealloc_block.objectid, 1544 prealloc_block.offset); 1545 } 1546 return ret; 1547 } 1548 ··· 1561 eb = btrfs_lock_root_node(root); 1562 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1563 BUG_ON(ret); 1564 + 1565 + btrfs_set_lock_blocking(eb); 1566 1567 parent = eb; 1568 while (1) { ··· 1586 eb = read_tree_block(root, bytenr, blocksize, 1587 generation); 1588 btrfs_tree_lock(eb); 1589 + btrfs_set_lock_blocking(eb); 1590 } 1591 1592 /* ··· 1610 eb = read_tree_block(root, bytenr, blocksize, 1611 generation); 1612 btrfs_tree_lock(eb); 1613 + btrfs_set_lock_blocking(eb); 1614 } 1615 1616 ret = btrfs_cow_block(trans, root, eb, parent, slot, ··· 2156 2157 right = read_node_slot(root, upper, slot + 1); 2158 btrfs_tree_lock(right); 2159 + btrfs_set_lock_blocking(right); 2160 + 2161 free_space = btrfs_leaf_free_space(root, right); 2162 if (free_space < data_size) 2163 goto out_unlock; ··· 2351 2352 left = read_node_slot(root, path->nodes[1], slot - 1); 2353 btrfs_tree_lock(left); 2354 + btrfs_set_lock_blocking(left); 2355 + 2356 free_space = btrfs_leaf_free_space(root, left); 2357 if (free_space < data_size) { 2358 ret = 1; ··· 2808 sizeof(struct btrfs_item), 1); 2809 path->keep_locks = 0; 2810 BUG_ON(ret); 2811 + 2812 + /* 2813 + * make sure any changes to the path from split_leaf leave it 2814 + * in a blocking state 2815 + */ 2816 + btrfs_set_path_blocking(path); 2817 2818 leaf = path->nodes[0]; 2819 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); ··· 3338 BUG(); 3339 } 3340 out: 3341 + btrfs_unlock_up_safe(path, 1); 3342 return ret; 3343 } 3344 ··· 3705 */ 3706 if (slot >= nritems) { 3707 path->slots[level] = slot; 3708 + btrfs_set_path_blocking(path); 3709 sret = btrfs_find_next_key(root, path, min_key, level, 3710 cache_only, min_trans); 3711 if (sret == 0) { 3712 btrfs_release_path(root, path); 3713 goto again; 3714 } else { 3715 + btrfs_clear_path_blocking(path); 3716 goto out; 3717 } 3718 } ··· 3722 unlock_up(path, level, 1); 3723 goto out; 3724 } 3725 + btrfs_set_path_blocking(path); 3726 cur = read_node_slot(root, cur, slot); 3727 3728 btrfs_tree_lock(cur); 3729 + 3730 path->locks[level - 1] = 1; 3731 path->nodes[level - 1] = cur; 3732 unlock_up(path, level, 1); 3733 + btrfs_clear_path_blocking(path); 3734 } 3735 out: 3736 if (ret == 0) 3737 memcpy(min_key, &found_key, sizeof(found_key)); 3738 + btrfs_set_path_blocking(path); 3739 return ret; 3740 } 3741 ··· 3827 if (ret < 0) 3828 return ret; 3829 3830 + btrfs_set_path_blocking(path); 3831 nritems = btrfs_header_nritems(path->nodes[0]); 3832 /* 3833 * by releasing the path above we dropped all our locks. A balance ··· 3857 free_extent_buffer(next); 3858 } 3859 3860 + /* the path was set to blocking above */ 3861 if (level == 1 && (path->locks[1] || path->skip_locking) && 3862 path->reada) 3863 reada_for_search(root, path, level, slot, 0); ··· 3865 if (!path->skip_locking) { 3866 WARN_ON(!btrfs_tree_locked(c)); 3867 btrfs_tree_lock(next); 3868 + btrfs_set_lock_blocking(next); 3869 } 3870 break; 3871 } ··· 3881 path->locks[level] = 1; 3882 if (!level) 3883 break; 3884 + 3885 + btrfs_set_path_blocking(path); 3886 if (level == 1 && path->locks[1] && path->reada) 3887 reada_for_search(root, path, level, slot, 0); 3888 next = read_node_slot(root, next, 0); 3889 if (!path->skip_locking) { 3890 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 3891 btrfs_tree_lock(next); 3892 + btrfs_set_lock_blocking(next); 3893 } 3894 } 3895 done: ··· 3911 3912 while (1) { 3913 if (path->slots[0] == 0) { 3914 + btrfs_set_path_blocking(path); 3915 ret = btrfs_prev_leaf(root, path); 3916 if (ret != 0) 3917 return ret;
+4
fs/btrfs/ctree.h
··· 1835 struct btrfs_path *btrfs_alloc_path(void); 1836 void btrfs_free_path(struct btrfs_path *p); 1837 void btrfs_init_path(struct btrfs_path *p); 1838 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1839 struct btrfs_path *path, int slot, int nr); 1840 int btrfs_del_leaf(struct btrfs_trans_handle *trans,
··· 1835 struct btrfs_path *btrfs_alloc_path(void); 1836 void btrfs_free_path(struct btrfs_path *p); 1837 void btrfs_init_path(struct btrfs_path *p); 1838 + void btrfs_set_path_blocking(struct btrfs_path *p); 1839 + void btrfs_clear_path_blocking(struct btrfs_path *p); 1840 + void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 1841 + 1842 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1843 struct btrfs_path *path, int slot, int nr); 1844 int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+8 -2
fs/btrfs/disk-io.c
··· 799 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 800 801 if (ret == 0) 802 - buf->flags |= EXTENT_UPTODATE; 803 else 804 WARN_ON(1); 805 return buf; ··· 813 if (btrfs_header_generation(buf) == 814 root->fs_info->running_transaction->transid) { 815 WARN_ON(!btrfs_tree_locked(buf)); 816 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 817 buf); 818 } ··· 2315 u64 transid = btrfs_header_generation(buf); 2316 struct inode *btree_inode = root->fs_info->btree_inode; 2317 2318 WARN_ON(!btrfs_tree_locked(buf)); 2319 if (transid != root->fs_info->generation) { 2320 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " ··· 2359 int ret; 2360 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2361 if (ret == 0) 2362 - buf->flags |= EXTENT_UPTODATE; 2363 return ret; 2364 } 2365
··· 799 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 800 801 if (ret == 0) 802 + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 803 else 804 WARN_ON(1); 805 return buf; ··· 813 if (btrfs_header_generation(buf) == 814 root->fs_info->running_transaction->transid) { 815 WARN_ON(!btrfs_tree_locked(buf)); 816 + 817 + /* ugh, clear_extent_buffer_dirty can be expensive */ 818 + btrfs_set_lock_blocking(buf); 819 + 820 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 821 buf); 822 } ··· 2311 u64 transid = btrfs_header_generation(buf); 2312 struct inode *btree_inode = root->fs_info->btree_inode; 2313 2314 + btrfs_set_lock_blocking(buf); 2315 + 2316 WARN_ON(!btrfs_tree_locked(buf)); 2317 if (transid != root->fs_info->generation) { 2318 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " ··· 2353 int ret; 2354 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2355 if (ret == 0) 2356 + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 2357 return ret; 2358 } 2359
+5
fs/btrfs/extent-tree.c
··· 3407 btrfs_set_header_generation(buf, trans->transid); 3408 btrfs_tree_lock(buf); 3409 clean_tree_block(trans, root, buf); 3410 btrfs_set_buffer_uptodate(buf); 3411 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3412 set_extent_dirty(&root->dirty_log_pages, buf->start, 3413 buf->start + buf->len - 1, GFP_NOFS); ··· 3419 buf->start + buf->len - 1, GFP_NOFS); 3420 } 3421 trans->blocks_used++; 3422 return buf; 3423 } 3424 ··· 3756 3757 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3758 btrfs_tree_lock(next); 3759 3760 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3761 &refs);
··· 3407 btrfs_set_header_generation(buf, trans->transid); 3408 btrfs_tree_lock(buf); 3409 clean_tree_block(trans, root, buf); 3410 + 3411 + btrfs_set_lock_blocking(buf); 3412 btrfs_set_buffer_uptodate(buf); 3413 + 3414 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3415 set_extent_dirty(&root->dirty_log_pages, buf->start, 3416 buf->start + buf->len - 1, GFP_NOFS); ··· 3416 buf->start + buf->len - 1, GFP_NOFS); 3417 } 3418 trans->blocks_used++; 3419 + /* this returns a buffer locked for blocking */ 3420 return buf; 3421 } 3422 ··· 3752 3753 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3754 btrfs_tree_lock(next); 3755 + btrfs_set_lock_blocking(next); 3756 3757 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3758 &refs);
+9 -9
fs/btrfs/extent_io.c
··· 2990 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2991 eb->start = start; 2992 eb->len = len; 2993 - mutex_init(&eb->mutex); 2994 #if LEAK_DEBUG 2995 spin_lock_irqsave(&leak_lock, flags); 2996 list_add(&eb->leak_list, &buffers); ··· 3073 unlock_page(p); 3074 } 3075 if (uptodate) 3076 - eb->flags |= EXTENT_UPTODATE; 3077 - eb->flags |= EXTENT_BUFFER_FILLED; 3078 3079 spin_lock(&tree->buffer_lock); 3080 exists = buffer_tree_insert(tree, start, &eb->rb_node); ··· 3227 unsigned long num_pages; 3228 3229 num_pages = num_extent_pages(eb->start, eb->len); 3230 - eb->flags &= ~EXTENT_UPTODATE; 3231 3232 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3233 GFP_NOFS); ··· 3298 struct page *page; 3299 int pg_uptodate = 1; 3300 3301 - if (eb->flags & EXTENT_UPTODATE) 3302 return 1; 3303 3304 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3334 struct bio *bio = NULL; 3335 unsigned long bio_flags = 0; 3336 3337 - if (eb->flags & EXTENT_UPTODATE) 3338 return 0; 3339 3340 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3365 } 3366 if (all_uptodate) { 3367 if (start_i == 0) 3368 - eb->flags |= EXTENT_UPTODATE; 3369 goto unlock_exit; 3370 } 3371 ··· 3401 } 3402 3403 if (!ret) 3404 - eb->flags |= EXTENT_UPTODATE; 3405 return ret; 3406 3407 unlock_exit: ··· 3498 unmap_extent_buffer(eb, eb->map_token, km); 3499 eb->map_token = NULL; 3500 save = 1; 3501 - WARN_ON(!mutex_is_locked(&eb->mutex)); 3502 } 3503 err = map_private_extent_buffer(eb, start, min_len, token, map, 3504 map_start, map_len, km);
··· 2990 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2991 eb->start = start; 2992 eb->len = len; 2993 + spin_lock_init(&eb->lock); 2994 + init_waitqueue_head(&eb->lock_wq); 2995 + 2996 #if LEAK_DEBUG 2997 spin_lock_irqsave(&leak_lock, flags); 2998 list_add(&eb->leak_list, &buffers); ··· 3071 unlock_page(p); 3072 } 3073 if (uptodate) 3074 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3075 3076 spin_lock(&tree->buffer_lock); 3077 exists = buffer_tree_insert(tree, start, &eb->rb_node); ··· 3226 unsigned long num_pages; 3227 3228 num_pages = num_extent_pages(eb->start, eb->len); 3229 + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3230 3231 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3232 GFP_NOFS); ··· 3297 struct page *page; 3298 int pg_uptodate = 1; 3299 3300 + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3301 return 1; 3302 3303 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3333 struct bio *bio = NULL; 3334 unsigned long bio_flags = 0; 3335 3336 + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3337 return 0; 3338 3339 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, ··· 3364 } 3365 if (all_uptodate) { 3366 if (start_i == 0) 3367 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3368 goto unlock_exit; 3369 } 3370 ··· 3400 } 3401 3402 if (!ret) 3403 + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3404 return ret; 3405 3406 unlock_exit: ··· 3497 unmap_extent_buffer(eb, eb->map_token, km); 3498 eb->map_token = NULL; 3499 save = 1; 3500 } 3501 err = map_private_extent_buffer(eb, start, min_len, token, map, 3502 map_start, map_len, km);
+14 -2
fs/btrfs/extent_io.h
··· 22 /* flags for bio submission */ 23 #define EXTENT_BIO_COMPRESSED 1 24 25 /* 26 * page->private values. Every page that is controlled by the extent 27 * map has page->private set to one. ··· 99 unsigned long map_start; 100 unsigned long map_len; 101 struct page *first_page; 102 atomic_t refs; 103 - int flags; 104 struct list_head leak_list; 105 struct rb_node rb_node; 106 - struct mutex mutex; 107 }; 108 109 struct extent_map_tree;
··· 22 /* flags for bio submission */ 23 #define EXTENT_BIO_COMPRESSED 1 24 25 + /* these are bit numbers for test/set bit */ 26 + #define EXTENT_BUFFER_UPTODATE 0 27 + #define EXTENT_BUFFER_BLOCKING 1 28 + 29 /* 30 * page->private values. Every page that is controlled by the extent 31 * map has page->private set to one. ··· 95 unsigned long map_start; 96 unsigned long map_len; 97 struct page *first_page; 98 + unsigned long bflags; 99 atomic_t refs; 100 struct list_head leak_list; 101 struct rb_node rb_node; 102 + 103 + /* the spinlock is used to protect most operations */ 104 + spinlock_t lock; 105 + 106 + /* 107 + * when we keep the lock held while blocking, waiters go onto 108 + * the wq 109 + */ 110 + wait_queue_head_t lock_wq; 111 }; 112 113 struct extent_map_tree;
+3
fs/btrfs/inode.c
··· 50 #include "tree-log.h" 51 #include "ref-cache.h" 52 #include "compression.h" 53 54 struct btrfs_iget_args { 55 u64 ino; ··· 2022 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2023 2024 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2025 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2026 alloc_group_block, 0); 2027 btrfs_free_path(path); ··· 2119 goto failed; 2120 } 2121 2122 leaf = path->nodes[0]; 2123 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2124 struct btrfs_inode_item);
··· 50 #include "tree-log.h" 51 #include "ref-cache.h" 52 #include "compression.h" 53 + #include "locking.h" 54 55 struct btrfs_iget_args { 56 u64 ino; ··· 2021 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2022 2023 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2024 + 2025 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2026 alloc_group_block, 0); 2027 btrfs_free_path(path); ··· 2117 goto failed; 2118 } 2119 2120 + btrfs_unlock_up_safe(path, 1); 2121 leaf = path->nodes[0]; 2122 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2123 struct btrfs_inode_item);
+191 -19
fs/btrfs/locking.c
··· 26 #include "locking.h" 27 28 /* 29 - * locks the per buffer mutex in an extent buffer. This uses adaptive locks 30 - * and the spin is not tuned very extensively. The spinning does make a big 31 - * difference in almost every workload, but spinning for the right amount of 32 - * time needs some help. 33 - * 34 - * In general, we want to spin as long as the lock holder is doing btree 35 - * searches, and we should give up if they are in more expensive code. 36 */ 37 38 - int btrfs_tree_lock(struct extent_buffer *eb) 39 { 40 int i; 41 - 42 - if (mutex_trylock(&eb->mutex)) 43 - return 0; 44 for (i = 0; i < 512; i++) { 45 cpu_relax(); 46 - if (mutex_trylock(&eb->mutex)) 47 - return 0; 48 } 49 - cpu_relax(); 50 - mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); 51 return 0; 52 } 53 54 int btrfs_try_tree_lock(struct extent_buffer *eb) 55 { 56 - return mutex_trylock(&eb->mutex); 57 } 58 59 int btrfs_tree_unlock(struct extent_buffer *eb) 60 { 61 - mutex_unlock(&eb->mutex); 62 return 0; 63 } 64 65 int btrfs_tree_locked(struct extent_buffer *eb) 66 { 67 - return mutex_is_locked(&eb->mutex); 68 } 69 70 /* ··· 245 { 246 int i; 247 struct extent_buffer *eb; 248 for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { 249 eb = path->nodes[i]; 250 if (!eb) 251 break; 252 smp_mb(); 253 - if (!list_empty(&eb->mutex.wait_list)) 254 return 1; 255 } 256 return 0;
··· 26 #include "locking.h" 27 28 /* 29 + * btrfs_header_level() isn't free, so don't call it when lockdep isn't 30 + * on 31 */ 32 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 33 + static inline void spin_nested(struct extent_buffer *eb) 34 + { 35 + spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); 36 + } 37 + #else 38 + static inline void spin_nested(struct extent_buffer *eb) 39 + { 40 + spin_lock(&eb->lock); 41 + } 42 + #endif 43 44 + /* 45 + * Setting a lock to blocking will drop the spinlock and set the 46 + * flag that forces other procs who want the lock to wait. After 47 + * this you can safely schedule with the lock held. 48 + */ 49 + void btrfs_set_lock_blocking(struct extent_buffer *eb) 50 + { 51 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 52 + set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 53 + spin_unlock(&eb->lock); 54 + } 55 + /* exit with the spin lock released and the bit set */ 56 + } 57 + 58 + /* 59 + * clearing the blocking flag will take the spinlock again. 60 + * After this you can't safely schedule 61 + */ 62 + void btrfs_clear_lock_blocking(struct extent_buffer *eb) 63 + { 64 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 65 + spin_nested(eb); 66 + clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); 67 + smp_mb__after_clear_bit(); 68 + } 69 + /* exit with the spin lock held */ 70 + } 71 + 72 + /* 73 + * unfortunately, many of the places that currently set a lock to blocking 74 + * don't end up blocking for every long, and often they don't block 75 + * at all. For a dbench 50 run, if we don't spin one the blocking bit 76 + * at all, the context switch rate can jump up to 400,000/sec or more. 77 + * 78 + * So, we're still stuck with this crummy spin on the blocking bit, 79 + * at least until the most common causes of the short blocks 80 + * can be dealt with. 81 + */ 82 + static int btrfs_spin_on_block(struct extent_buffer *eb) 83 { 84 int i; 85 for (i = 0; i < 512; i++) { 86 cpu_relax(); 87 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 88 + return 1; 89 + if (need_resched()) 90 + break; 91 } 92 return 0; 93 } 94 95 + /* 96 + * This is somewhat different from trylock. It will take the 97 + * spinlock but if it finds the lock is set to blocking, it will 98 + * return without the lock held. 99 + * 100 + * returns 1 if it was able to take the lock and zero otherwise 101 + * 102 + * After this call, scheduling is not safe without first calling 103 + * btrfs_set_lock_blocking() 104 + */ 105 + int btrfs_try_spin_lock(struct extent_buffer *eb) 106 + { 107 + int i; 108 + 109 + spin_nested(eb); 110 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 111 + return 1; 112 + spin_unlock(&eb->lock); 113 + 114 + /* spin for a bit on the BLOCKING flag */ 115 + for (i = 0; i < 2; i++) { 116 + if (!btrfs_spin_on_block(eb)) 117 + break; 118 + 119 + spin_nested(eb); 120 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 121 + return 1; 122 + spin_unlock(&eb->lock); 123 + } 124 + return 0; 125 + } 126 + 127 + /* 128 + * the autoremove wake function will return 0 if it tried to wake up 129 + * a process that was already awake, which means that process won't 130 + * count as an exclusive wakeup. The waitq code will continue waking 131 + * procs until it finds one that was actually sleeping. 132 + * 133 + * For btrfs, this isn't quite what we want. We want a single proc 134 + * to be notified that the lock is ready for taking. If that proc 135 + * already happen to be awake, great, it will loop around and try for 136 + * the lock. 137 + * 138 + * So, btrfs_wake_function always returns 1, even when the proc that we 139 + * tried to wake up was already awake. 140 + */ 141 + static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, 142 + int sync, void *key) 143 + { 144 + autoremove_wake_function(wait, mode, sync, key); 145 + return 1; 146 + } 147 + 148 + /* 149 + * returns with the extent buffer spinlocked. 150 + * 151 + * This will spin and/or wait as required to take the lock, and then 152 + * return with the spinlock held. 153 + * 154 + * After this call, scheduling is not safe without first calling 155 + * btrfs_set_lock_blocking() 156 + */ 157 + int btrfs_tree_lock(struct extent_buffer *eb) 158 + { 159 + DEFINE_WAIT(wait); 160 + wait.func = btrfs_wake_function; 161 + 162 + while(1) { 163 + spin_nested(eb); 164 + 165 + /* nobody is blocking, exit with the spinlock held */ 166 + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 167 + return 0; 168 + 169 + /* 170 + * we have the spinlock, but the real owner is blocking. 171 + * wait for them 172 + */ 173 + spin_unlock(&eb->lock); 174 + 175 + /* 176 + * spin for a bit, and if the blocking flag goes away, 177 + * loop around 178 + */ 179 + if (btrfs_spin_on_block(eb)) 180 + continue; 181 + 182 + prepare_to_wait_exclusive(&eb->lock_wq, &wait, 183 + TASK_UNINTERRUPTIBLE); 184 + 185 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 186 + schedule(); 187 + 188 + finish_wait(&eb->lock_wq, &wait); 189 + } 190 + return 0; 191 + } 192 + 193 + /* 194 + * Very quick trylock, this does not spin or schedule. It returns 195 + * 1 with the spinlock held if it was able to take the lock, or it 196 + * returns zero if it was unable to take the lock. 197 + * 198 + * After this call, scheduling is not safe without first calling 199 + * btrfs_set_lock_blocking() 200 + */ 201 int btrfs_try_tree_lock(struct extent_buffer *eb) 202 { 203 + if (spin_trylock(&eb->lock)) { 204 + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { 205 + /* 206 + * we've got the spinlock, but the real owner is 207 + * blocking. Drop the spinlock and return failure 208 + */ 209 + spin_unlock(&eb->lock); 210 + return 0; 211 + } 212 + return 1; 213 + } 214 + /* someone else has the spinlock giveup */ 215 + return 0; 216 } 217 218 int btrfs_tree_unlock(struct extent_buffer *eb) 219 { 220 + /* 221 + * if we were a blocking owner, we don't have the spinlock held 222 + * just clear the bit and look for waiters 223 + */ 224 + if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 225 + smp_mb__after_clear_bit(); 226 + else 227 + spin_unlock(&eb->lock); 228 + 229 + if (waitqueue_active(&eb->lock_wq)) 230 + wake_up(&eb->lock_wq); 231 return 0; 232 } 233 234 int btrfs_tree_locked(struct extent_buffer *eb) 235 { 236 + return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || 237 + spin_is_locked(&eb->lock); 238 } 239 240 /* ··· 75 { 76 int i; 77 struct extent_buffer *eb; 78 + 79 for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { 80 eb = path->nodes[i]; 81 if (!eb) 82 break; 83 smp_mb(); 84 + if (spin_is_contended(&eb->lock) || 85 + waitqueue_active(&eb->lock_wq)) 86 return 1; 87 } 88 return 0;
+6
fs/btrfs/locking.h
··· 22 int btrfs_tree_lock(struct extent_buffer *eb); 23 int btrfs_tree_unlock(struct extent_buffer *eb); 24 int btrfs_tree_locked(struct extent_buffer *eb); 25 int btrfs_try_tree_lock(struct extent_buffer *eb); 26 int btrfs_path_lock_waiting(struct btrfs_path *path, int level); 27 #endif
··· 22 int btrfs_tree_lock(struct extent_buffer *eb); 23 int btrfs_tree_unlock(struct extent_buffer *eb); 24 int btrfs_tree_locked(struct extent_buffer *eb); 25 + 26 int btrfs_try_tree_lock(struct extent_buffer *eb); 27 + int btrfs_try_spin_lock(struct extent_buffer *eb); 28 + 29 int btrfs_path_lock_waiting(struct btrfs_path *path, int level); 30 + 31 + void btrfs_set_lock_blocking(struct extent_buffer *eb); 32 + void btrfs_clear_lock_blocking(struct extent_buffer *eb); 33 #endif
+1
fs/btrfs/tree-defrag.c
··· 74 u32 nritems; 75 76 root_node = btrfs_lock_root_node(root); 77 nritems = btrfs_header_nritems(root_node); 78 root->defrag_max.objectid = 0; 79 /* from above we know this is not a leaf */
··· 74 u32 nritems; 75 76 root_node = btrfs_lock_root_node(root); 77 + btrfs_set_lock_blocking(root_node); 78 nritems = btrfs_header_nritems(root_node); 79 root->defrag_max.objectid = 0; 80 /* from above we know this is not a leaf */
+4
fs/btrfs/tree-log.c
··· 1615 1616 btrfs_tree_lock(next); 1617 clean_tree_block(trans, root, next); 1618 btrfs_wait_tree_block_writeback(next); 1619 btrfs_tree_unlock(next); 1620 ··· 1662 next = path->nodes[*level]; 1663 btrfs_tree_lock(next); 1664 clean_tree_block(trans, root, next); 1665 btrfs_wait_tree_block_writeback(next); 1666 btrfs_tree_unlock(next); 1667 ··· 1720 1721 btrfs_tree_lock(next); 1722 clean_tree_block(trans, root, next); 1723 btrfs_wait_tree_block_writeback(next); 1724 btrfs_tree_unlock(next); 1725 ··· 1793 1794 btrfs_tree_lock(next); 1795 clean_tree_block(trans, log, next); 1796 btrfs_wait_tree_block_writeback(next); 1797 btrfs_tree_unlock(next); 1798
··· 1615 1616 btrfs_tree_lock(next); 1617 clean_tree_block(trans, root, next); 1618 + btrfs_set_lock_blocking(next); 1619 btrfs_wait_tree_block_writeback(next); 1620 btrfs_tree_unlock(next); 1621 ··· 1661 next = path->nodes[*level]; 1662 btrfs_tree_lock(next); 1663 clean_tree_block(trans, root, next); 1664 + btrfs_set_lock_blocking(next); 1665 btrfs_wait_tree_block_writeback(next); 1666 btrfs_tree_unlock(next); 1667 ··· 1718 1719 btrfs_tree_lock(next); 1720 clean_tree_block(trans, root, next); 1721 + btrfs_set_lock_blocking(next); 1722 btrfs_wait_tree_block_writeback(next); 1723 btrfs_tree_unlock(next); 1724 ··· 1790 1791 btrfs_tree_lock(next); 1792 clean_tree_block(trans, log, next); 1793 + btrfs_set_lock_blocking(next); 1794 btrfs_wait_tree_block_writeback(next); 1795 btrfs_tree_unlock(next); 1796