btrfs: ensure releasing squota reserve on head refs

A reservation goes through a 3 step lifetime:

- generated during delalloc
- released/counted by ordered_extent allocation
- freed by running delayed ref

That third step depends on must_insert_reserved on the head ref, so the
head ref with that field set owns the reservation. Once you prepare to
run the head ref, must_insert_reserved is unset, which means that
running the ref must free the reservation, whether or not it succeeds,
or else the reservation is leaked. That results in either a risk of
spurious ENOSPC if the fs stays writeable or a warning on unmount if it
is readonly.

The existing squota code was aware of these invariants, but missed a few
cases. Improve it by adding a helper function to use in the cleanup
paths and call it from the existing early returns in running delayed
refs. This also simplifies btrfs_record_squota_delta and struct
btrfs_quota_delta.

This fixes (or at least improves the reliability of) generic/475 with
"mkfs -O squota". On my machine, that test failed ~4/10 times without
this patch and passed 100/100 times with it.

Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by Boris Burkov and committed by David Sterba e85a0ada a8680550

+46 -19
+34 -14
fs/btrfs/extent-tree.c
··· 1547 return ret; 1548 } 1549 1550 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1551 struct btrfs_delayed_ref_head *href, 1552 struct btrfs_delayed_ref_node *node, ··· 1586 struct btrfs_squota_delta delta = { 1587 .root = href->owning_root, 1588 .num_bytes = node->num_bytes, 1589 - .rsv_bytes = href->reserved_bytes, 1590 .is_data = true, 1591 .is_inc = true, 1592 .generation = trans->transid, ··· 1602 flags, ref->objectid, 1603 ref->offset, &key, 1604 node->ref_mod, href->owning_root); 1605 if (!ret) 1606 ret = btrfs_record_squota_delta(trans->fs_info, &delta); 1607 - else 1608 - btrfs_qgroup_free_refroot(trans->fs_info, delta.root, 1609 - delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA); 1610 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1611 ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, 1612 ref->objectid, ref->offset, ··· 1756 struct btrfs_squota_delta delta = { 1757 .root = href->owning_root, 1758 .num_bytes = fs_info->nodesize, 1759 - .rsv_bytes = 0, 1760 .is_data = false, 1761 .is_inc = true, 1762 .generation = trans->transid, ··· 1787 int ret = 0; 1788 1789 if (TRANS_ABORTED(trans)) { 1790 - if (insert_reserved) 1791 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1792 return 0; 1793 } 1794 ··· 1886 struct btrfs_delayed_ref_root *delayed_refs, 1887 struct btrfs_delayed_ref_head *head) 1888 { 1889 /* 1890 * We had csum deletions accounted for in our delayed refs rsv, we need 1891 * to drop the csum leaves for this update from our delayed_refs_rsv. ··· 1902 1903 btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); 1904 1905 - return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); 1906 } 1907 - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && 1908 - head->must_insert_reserved && head->is_data) 1909 - btrfs_qgroup_free_refroot(fs_info, head->owning_root, 1910 - head->reserved_bytes, BTRFS_QGROUP_RSV_DATA); 1911 1912 - return 0; 1913 } 1914 1915 static int cleanup_ref_head(struct btrfs_trans_handle *trans, ··· 2049 * spin lock. 2050 */ 2051 must_insert_reserved = locked_ref->must_insert_reserved; 2052 locked_ref->must_insert_reserved = false; 2053 2054 extent_op = locked_ref->extent_op; ··· 3314 struct btrfs_squota_delta delta = { 3315 .root = delayed_ref_root, 3316 .num_bytes = num_bytes, 3317 - .rsv_bytes = 0, 3318 .is_data = is_data, 3319 .is_inc = false, 3320 .generation = btrfs_extent_generation(leaf, ei), ··· 4958 .root = root_objectid, 4959 .num_bytes = ins->offset, 4960 .generation = trans->transid, 4961 - .rsv_bytes = 0, 4962 .is_data = true, 4963 .is_inc = true, 4964 };
··· 1547 return ret; 1548 } 1549 1550 + static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, 1551 + struct btrfs_delayed_ref_head *href) 1552 + { 1553 + u64 root = href->owning_root; 1554 + 1555 + /* 1556 + * Don't check must_insert_reserved, as this is called from contexts 1557 + * where it has already been unset. 1558 + */ 1559 + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE || 1560 + !href->is_data || !is_fstree(root)) 1561 + return; 1562 + 1563 + btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes, 1564 + BTRFS_QGROUP_RSV_DATA); 1565 + } 1566 + 1567 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1568 struct btrfs_delayed_ref_head *href, 1569 struct btrfs_delayed_ref_node *node, ··· 1569 struct btrfs_squota_delta delta = { 1570 .root = href->owning_root, 1571 .num_bytes = node->num_bytes, 1572 .is_data = true, 1573 .is_inc = true, 1574 .generation = trans->transid, ··· 1586 flags, ref->objectid, 1587 ref->offset, &key, 1588 node->ref_mod, href->owning_root); 1589 + free_head_ref_squota_rsv(trans->fs_info, href); 1590 if (!ret) 1591 ret = btrfs_record_squota_delta(trans->fs_info, &delta); 1592 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1593 ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, 1594 ref->objectid, ref->offset, ··· 1742 struct btrfs_squota_delta delta = { 1743 .root = href->owning_root, 1744 .num_bytes = fs_info->nodesize, 1745 .is_data = false, 1746 .is_inc = true, 1747 .generation = trans->transid, ··· 1774 int ret = 0; 1775 1776 if (TRANS_ABORTED(trans)) { 1777 + if (insert_reserved) { 1778 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1779 + free_head_ref_squota_rsv(trans->fs_info, href); 1780 + } 1781 return 0; 1782 } 1783 ··· 1871 struct btrfs_delayed_ref_root *delayed_refs, 1872 struct btrfs_delayed_ref_head *head) 1873 { 1874 + u64 ret = 0; 1875 + 1876 /* 1877 * We had csum deletions accounted for in our delayed refs rsv, we need 1878 * to drop the csum leaves for this update from our delayed_refs_rsv. ··· 1885 1886 btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); 1887 1888 + ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); 1889 } 1890 + /* must_insert_reserved can be set only if we didn't run the head ref. */ 1891 + if (head->must_insert_reserved) 1892 + free_head_ref_squota_rsv(fs_info, head); 1893 1894 + return ret; 1895 } 1896 1897 static int cleanup_ref_head(struct btrfs_trans_handle *trans, ··· 2033 * spin lock. 2034 */ 2035 must_insert_reserved = locked_ref->must_insert_reserved; 2036 + /* 2037 + * Unsetting this on the head ref relinquishes ownership of 2038 + * the rsv_bytes, so it is critical that every possible code 2039 + * path from here forward frees all reserves including qgroup 2040 + * reserve. 2041 + */ 2042 locked_ref->must_insert_reserved = false; 2043 2044 extent_op = locked_ref->extent_op; ··· 3292 struct btrfs_squota_delta delta = { 3293 .root = delayed_ref_root, 3294 .num_bytes = num_bytes, 3295 .is_data = is_data, 3296 .is_inc = false, 3297 .generation = btrfs_extent_generation(leaf, ei), ··· 4937 .root = root_objectid, 4938 .num_bytes = ins->offset, 4939 .generation = trans->transid, 4940 .is_data = true, 4941 .is_inc = true, 4942 };
+11 -3
fs/btrfs/qgroup.c
··· 4661 *root = RB_ROOT; 4662 } 4663 4664 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 4665 struct btrfs_squota_delta *delta) 4666 { ··· 4716 4717 out: 4718 spin_unlock(&fs_info->qgroup_lock); 4719 - if (!ret && delta->rsv_bytes) 4720 - btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes, 4721 - BTRFS_QGROUP_RSV_DATA); 4722 return ret; 4723 }
··· 4661 *root = RB_ROOT; 4662 } 4663 4664 + void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes) 4665 + { 4666 + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) 4667 + return; 4668 + 4669 + if (!is_fstree(root)) 4670 + return; 4671 + 4672 + btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA); 4673 + } 4674 + 4675 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 4676 struct btrfs_squota_delta *delta) 4677 { ··· 4705 4706 out: 4707 spin_unlock(&fs_info->qgroup_lock); 4708 return ret; 4709 }
+1 -2
fs/btrfs/qgroup.h
··· 274 u64 root; 275 /* The number of bytes in the extent being counted. */ 276 u64 num_bytes; 277 - /* The number of bytes reserved for this extent. */ 278 - u64 rsv_bytes; 279 /* The generation the extent was created in. */ 280 u64 generation; 281 /* Whether we are using or freeing the extent. */ ··· 420 struct btrfs_root *root, struct extent_buffer *eb); 421 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); 422 bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); 423 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 424 struct btrfs_squota_delta *delta); 425
··· 274 u64 root; 275 /* The number of bytes in the extent being counted. */ 276 u64 num_bytes; 277 /* The generation the extent was created in. */ 278 u64 generation; 279 /* Whether we are using or freeing the extent. */ ··· 422 struct btrfs_root *root, struct extent_buffer *eb); 423 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); 424 bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); 425 + void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes); 426 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 427 struct btrfs_squota_delta *delta); 428