Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: rework qgroup accounting

Currently qgroups account for space by intercepting delayed ref updates to fs
trees. It does this by adding sequence numbers to delayed ref updates so that
it can figure out how the tree looked before the update so we can adjust the
counters properly. The problem with this is that it does not allow delayed refs
to be merged, so if you say are defragging an extent with 5k snapshots pointing
to it we will thrash the delayed ref lock because we need to go back and
manually merge these things together. Instead we want to process quota changes
when we know they are going to happen, like when we first allocate an extent, we
free a reference for an extent, we add new references etc. This patch
accomplishes this by only adding qgroup operations for real ref changes. We
only modify the sequence number when we need to lookup roots for bytenrs, this
reduces the amount of churn on the sequence number and allows us to merge
delayed refs as we add them most of the time. This patch encompasses a bunch of
architectural changes

1) qgroup ref operations: instead of tracking qgroup operations through the
delayed refs we simply add new ref operations whenever we notice that we need to
when we've modified the refs themselves.

2) tree mod seq: we no longer have this separation of major/minor counters.
this makes the sequence number stuff much more sane and we can remove some
locking that was needed to protect the counter.

3) delayed ref seq: we now read the tree mod seq number and use that as our
sequence. This means each new delayed ref doesn't have it's own unique sequence
number, rather whenever we go to lookup backrefs we inc the sequence number so
we can make sure to keep any new operations from screwing up our world view at
that given point. This allows us to merge delayed refs during runtime.

With all of these changes the delayed ref stuff is a little saner and the qgroup
accounting stuff no longer goes negative in some cases like it was before.
Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Josef Bacik and committed by
Chris Mason
fcebe456 5dca6eea

+1055 -490
+2 -2
fs/btrfs/backref.h
··· 55 55 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); 56 56 57 57 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 58 - struct btrfs_fs_info *fs_info, u64 bytenr, 59 - u64 time_seq, struct ulist **roots); 58 + struct btrfs_fs_info *fs_info, u64 bytenr, 59 + u64 time_seq, struct ulist **roots); 60 60 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 61 61 u32 name_len, unsigned long name_off, 62 62 struct extent_buffer *eb_in, u64 parent,
+5 -40
fs/btrfs/ctree.c
··· 356 356 } 357 357 358 358 /* 359 - * Increment the upper half of tree_mod_seq, set lower half zero. 360 - * 361 - * Must be called with fs_info->tree_mod_seq_lock held. 359 + * Pull a new tree mod seq number for our operation. 362 360 */ 363 - static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) 364 - { 365 - u64 seq = atomic64_read(&fs_info->tree_mod_seq); 366 - seq &= 0xffffffff00000000ull; 367 - seq += 1ull << 32; 368 - atomic64_set(&fs_info->tree_mod_seq, seq); 369 - return seq; 370 - } 371 - 372 - /* 373 - * Increment the lower half of tree_mod_seq. 374 - * 375 - * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers 376 - * are generated should not technically require a spin lock here. (Rationale: 377 - * incrementing the minor while incrementing the major seq number is between its 378 - * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it 379 - * just returns a unique sequence number as usual.) We have decided to leave 380 - * that requirement in here and rethink it once we notice it really imposes a 381 - * problem on some workload. 382 - */ 383 - static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) 361 + static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) 384 362 { 385 363 return atomic64_inc_return(&fs_info->tree_mod_seq); 386 - } 387 - 388 - /* 389 - * return the last minor in the previous major tree_mod_seq number 390 - */ 391 - u64 btrfs_tree_mod_seq_prev(u64 seq) 392 - { 393 - return (seq & 0xffffffff00000000ull) - 1ull; 394 364 } 395 365 396 366 /* ··· 374 404 u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 375 405 struct seq_list *elem) 376 406 { 377 - u64 seq; 378 - 379 407 tree_mod_log_write_lock(fs_info); 380 408 spin_lock(&fs_info->tree_mod_seq_lock); 381 409 if (!elem->seq) { 382 - elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); 410 + elem->seq = btrfs_inc_tree_mod_seq(fs_info); 383 411 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 384 412 } 385 - seq = btrfs_inc_tree_mod_seq_minor(fs_info); 386 413 spin_unlock(&fs_info->tree_mod_seq_lock); 387 414 tree_mod_log_write_unlock(fs_info); 388 415 389 - return seq; 416 + return elem->seq; 390 417 } 391 418 392 419 void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, ··· 456 489 457 490 BUG_ON(!tm); 458 491 459 - spin_lock(&fs_info->tree_mod_seq_lock); 460 - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); 461 - spin_unlock(&fs_info->tree_mod_seq_lock); 492 + tm->seq = btrfs_inc_tree_mod_seq(fs_info); 462 493 463 494 tm_root = &fs_info->tree_mod_log; 464 495 new = &tm_root->rb_node;
+7 -52
fs/btrfs/ctree.h
··· 1648 1648 1649 1649 /* holds configuration and tracking. Protected by qgroup_lock */ 1650 1650 struct rb_root qgroup_tree; 1651 + struct rb_root qgroup_op_tree; 1651 1652 spinlock_t qgroup_lock; 1653 + spinlock_t qgroup_op_lock; 1654 + atomic_t qgroup_op_seq; 1652 1655 1653 1656 /* 1654 1657 * used to avoid frequently calling ulist_alloc()/ulist_free() ··· 3303 3300 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3304 3301 struct btrfs_key *ins, int is_data); 3305 3302 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3306 - struct extent_buffer *buf, int full_backref, int for_cow); 3303 + struct extent_buffer *buf, int full_backref, int no_quota); 3307 3304 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3308 - struct extent_buffer *buf, int full_backref, int for_cow); 3305 + struct extent_buffer *buf, int full_backref, int no_quota); 3309 3306 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3310 3307 struct btrfs_root *root, 3311 3308 u64 bytenr, u64 num_bytes, u64 flags, ··· 3313 3310 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3314 3311 struct btrfs_root *root, 3315 3312 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3316 - u64 owner, u64 offset, int for_cow); 3313 + u64 owner, u64 offset, int no_quota); 3317 3314 3318 3315 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3319 3316 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, ··· 3325 3322 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 3326 3323 struct btrfs_root *root, 3327 3324 u64 bytenr, u64 num_bytes, u64 parent, 3328 - u64 root_objectid, u64 owner, u64 offset, int for_cow); 3325 + u64 root_objectid, u64 owner, u64 offset, int no_quota); 3329 3326 3330 3327 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3331 3328 struct btrfs_root *root); ··· 3413 3410 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, 3414 3411 struct btrfs_fs_info *fs_info); 3415 3412 int __get_raid_index(u64 flags); 3416 - 3417 3413 int btrfs_start_nocow_write(struct btrfs_root *root); 3418 3414 void btrfs_end_nocow_write(struct btrfs_root *root); 3419 3415 /* ctree.c */ ··· 3588 3586 struct seq_list *elem); 3589 3587 void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3590 3588 struct seq_list *elem); 3591 - u64 btrfs_tree_mod_seq_prev(u64 seq); 3592 3589 int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 3593 3590 3594 3591 /* root-item.c */ ··· 4094 4093 void btrfs_reada_detach(void *handle); 4095 4094 int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 4096 4095 u64 start, int err); 4097 - 4098 - /* qgroup.c */ 4099 - struct qgroup_update { 4100 - struct list_head list; 4101 - struct btrfs_delayed_ref_node *node; 4102 - struct btrfs_delayed_extent_op *extent_op; 4103 - }; 4104 - 4105 - int btrfs_quota_enable(struct btrfs_trans_handle *trans, 4106 - struct btrfs_fs_info *fs_info); 4107 - int btrfs_quota_disable(struct btrfs_trans_handle *trans, 4108 - struct btrfs_fs_info *fs_info); 4109 - int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 4110 - void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 4111 - int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); 4112 - int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 4113 - struct btrfs_fs_info *fs_info, u64 src, u64 dst); 4114 - int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 4115 - struct btrfs_fs_info *fs_info, u64 src, u64 dst); 4116 - int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 4117 - struct btrfs_fs_info *fs_info, u64 qgroupid, 4118 - char *name); 4119 - int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 4120 - struct btrfs_fs_info *fs_info, u64 qgroupid); 4121 - int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 4122 - struct btrfs_fs_info *fs_info, u64 qgroupid, 4123 - struct btrfs_qgroup_limit *limit); 4124 - int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 4125 - void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 4126 - struct btrfs_delayed_extent_op; 4127 - int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 4128 - struct btrfs_delayed_ref_node *node, 4129 - struct btrfs_delayed_extent_op *extent_op); 4130 - int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, 4131 - struct btrfs_fs_info *fs_info, 4132 - struct btrfs_delayed_ref_node *node, 4133 - struct btrfs_delayed_extent_op *extent_op); 4134 - int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 4135 - struct btrfs_fs_info *fs_info); 4136 - int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 4137 - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 4138 - struct btrfs_qgroup_inherit *inherit); 4139 - int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); 4140 - void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); 4141 - 4142 - void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); 4143 4096 4144 4097 static inline int is_fstree(u64 rootid) 4145 4098 {
+23 -16
fs/btrfs/delayed-ref.c
··· 106 106 return -1; 107 107 if (ref1->type > ref2->type) 108 108 return 1; 109 + if (ref1->no_quota > ref2->no_quota) 110 + return 1; 111 + if (ref1->no_quota < ref2->no_quota) 112 + return -1; 109 113 /* merging of sequenced refs is not allowed */ 110 114 if (compare_seq) { 111 115 if (ref1->seq < ref2->seq) ··· 639 635 struct btrfs_delayed_ref_head *head_ref, 640 636 struct btrfs_delayed_ref_node *ref, u64 bytenr, 641 637 u64 num_bytes, u64 parent, u64 ref_root, int level, 642 - int action, int for_cow) 638 + int action, int no_quota) 643 639 { 644 640 struct btrfs_delayed_ref_node *existing; 645 641 struct btrfs_delayed_tree_ref *full_ref; ··· 649 645 if (action == BTRFS_ADD_DELAYED_EXTENT) 650 646 action = BTRFS_ADD_DELAYED_REF; 651 647 648 + if (is_fstree(ref_root)) 649 + seq = atomic64_read(&fs_info->tree_mod_seq); 652 650 delayed_refs = &trans->transaction->delayed_refs; 653 651 654 652 /* first set the basic ref node struct up */ ··· 661 655 ref->action = action; 662 656 ref->is_head = 0; 663 657 ref->in_tree = 1; 664 - 665 - if (need_ref_seq(for_cow, ref_root)) 666 - seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); 658 + ref->no_quota = no_quota; 667 659 ref->seq = seq; 668 660 669 661 full_ref = btrfs_delayed_node_to_tree_ref(ref); ··· 701 697 struct btrfs_delayed_ref_head *head_ref, 702 698 struct btrfs_delayed_ref_node *ref, u64 bytenr, 703 699 u64 num_bytes, u64 parent, u64 ref_root, u64 owner, 704 - u64 offset, int action, int for_cow) 700 + u64 offset, int action, int no_quota) 705 701 { 706 702 struct btrfs_delayed_ref_node *existing; 707 703 struct btrfs_delayed_data_ref *full_ref; ··· 713 709 714 710 delayed_refs = &trans->transaction->delayed_refs; 715 711 712 + if (is_fstree(ref_root)) 713 + seq = atomic64_read(&fs_info->tree_mod_seq); 714 + 716 715 /* first set the basic ref node struct up */ 717 716 atomic_set(&ref->refs, 1); 718 717 ref->bytenr = bytenr; ··· 724 717 ref->action = action; 725 718 ref->is_head = 0; 726 719 ref->in_tree = 1; 727 - 728 - if (need_ref_seq(for_cow, ref_root)) 729 - seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); 720 + ref->no_quota = no_quota; 730 721 ref->seq = seq; 731 722 732 723 full_ref = btrfs_delayed_node_to_data_ref(ref); ··· 767 762 u64 bytenr, u64 num_bytes, u64 parent, 768 763 u64 ref_root, int level, int action, 769 764 struct btrfs_delayed_extent_op *extent_op, 770 - int for_cow) 765 + int no_quota) 771 766 { 772 767 struct btrfs_delayed_tree_ref *ref; 773 768 struct btrfs_delayed_ref_head *head_ref; 774 769 struct btrfs_delayed_ref_root *delayed_refs; 770 + 771 + if (!is_fstree(ref_root) || !fs_info->quota_enabled) 772 + no_quota = 0; 775 773 776 774 BUG_ON(extent_op && extent_op->is_data); 777 775 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); ··· 801 793 802 794 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, 803 795 num_bytes, parent, ref_root, level, action, 804 - for_cow); 796 + no_quota); 805 797 spin_unlock(&delayed_refs->lock); 806 - if (need_ref_seq(for_cow, ref_root)) 807 - btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 808 798 809 799 return 0; 810 800 } ··· 816 810 u64 parent, u64 ref_root, 817 811 u64 owner, u64 offset, int action, 818 812 struct btrfs_delayed_extent_op *extent_op, 819 - int for_cow) 813 + int no_quota) 820 814 { 821 815 struct btrfs_delayed_data_ref *ref; 822 816 struct btrfs_delayed_ref_head *head_ref; 823 817 struct btrfs_delayed_ref_root *delayed_refs; 818 + 819 + if (!is_fstree(ref_root) || !fs_info->quota_enabled) 820 + no_quota = 0; 824 821 825 822 BUG_ON(extent_op && !extent_op->is_data); 826 823 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); ··· 850 841 851 842 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, 852 843 num_bytes, parent, ref_root, owner, offset, 853 - action, for_cow); 844 + action, no_quota); 854 845 spin_unlock(&delayed_refs->lock); 855 - if (need_ref_seq(for_cow, ref_root)) 856 - btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 857 846 858 847 return 0; 859 848 }
+3 -21
fs/btrfs/delayed-ref.h
··· 52 52 53 53 unsigned int action:8; 54 54 unsigned int type:8; 55 + unsigned int no_quota:1; 55 56 /* is this node still in the rbtree? */ 56 57 unsigned int is_head:1; 57 58 unsigned int in_tree:1; ··· 197 196 u64 bytenr, u64 num_bytes, u64 parent, 198 197 u64 ref_root, int level, int action, 199 198 struct btrfs_delayed_extent_op *extent_op, 200 - int for_cow); 199 + int no_quota); 201 200 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 202 201 struct btrfs_trans_handle *trans, 203 202 u64 bytenr, u64 num_bytes, 204 203 u64 parent, u64 ref_root, 205 204 u64 owner, u64 offset, int action, 206 205 struct btrfs_delayed_extent_op *extent_op, 207 - int for_cow); 206 + int no_quota); 208 207 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 209 208 struct btrfs_trans_handle *trans, 210 209 u64 bytenr, u64 num_bytes, ··· 230 229 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 231 230 struct btrfs_delayed_ref_root *delayed_refs, 232 231 u64 seq); 233 - 234 - /* 235 - * delayed refs with a ref_seq > 0 must be held back during backref walking. 236 - * this only applies to items in one of the fs-trees. for_cow items never need 237 - * to be held back, so they won't get a ref_seq number. 238 - */ 239 - static inline int need_ref_seq(int for_cow, u64 rootid) 240 - { 241 - if (for_cow) 242 - return 0; 243 - 244 - if (rootid == BTRFS_FS_TREE_OBJECTID) 245 - return 1; 246 - 247 - if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) 248 - return 1; 249 - 250 - return 0; 251 - } 252 232 253 233 /* 254 234 * a node might live in a head or a regular ref, this lets you
+4
fs/btrfs/disk-io.c
··· 49 49 #include "dev-replace.h" 50 50 #include "raid56.h" 51 51 #include "sysfs.h" 52 + #include "qgroup.h" 52 53 53 54 #ifdef CONFIG_X86 54 55 #include <asm/cpufeature.h> ··· 2220 2219 spin_lock_init(&fs_info->free_chunk_lock); 2221 2220 spin_lock_init(&fs_info->tree_mod_seq_lock); 2222 2221 spin_lock_init(&fs_info->super_lock); 2222 + spin_lock_init(&fs_info->qgroup_op_lock); 2223 2223 spin_lock_init(&fs_info->buffer_lock); 2224 2224 rwlock_init(&fs_info->tree_mod_log_lock); 2225 2225 mutex_init(&fs_info->reloc_mutex); ··· 2246 2244 atomic_set(&fs_info->async_submit_draining, 0); 2247 2245 atomic_set(&fs_info->nr_async_bios, 0); 2248 2246 atomic_set(&fs_info->defrag_running, 0); 2247 + atomic_set(&fs_info->qgroup_op_seq, 0); 2249 2248 atomic64_set(&fs_info->tree_mod_seq, 0); 2250 2249 fs_info->sb = sb; 2251 2250 fs_info->max_inline = 8192 * 1024; ··· 2356 2353 spin_lock_init(&fs_info->qgroup_lock); 2357 2354 mutex_init(&fs_info->qgroup_ioctl_lock); 2358 2355 fs_info->qgroup_tree = RB_ROOT; 2356 + fs_info->qgroup_op_tree = RB_ROOT; 2359 2357 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2360 2358 fs_info->qgroup_seq = 1; 2361 2359 fs_info->quota_enabled = 0;
+130 -76
fs/btrfs/extent-tree.c
··· 35 35 #include "free-space-cache.h" 36 36 #include "math.h" 37 37 #include "sysfs.h" 38 + #include "qgroup.h" 38 39 39 40 #undef SCRAMBLE_DELAYED_REFS 40 41 ··· 81 80 u64 bytenr, u64 num_bytes, u64 parent, 82 81 u64 root_objectid, u64 owner_objectid, 83 82 u64 owner_offset, int refs_to_drop, 84 - struct btrfs_delayed_extent_op *extra_op); 83 + struct btrfs_delayed_extent_op *extra_op, 84 + int no_quota); 85 85 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 86 86 struct extent_buffer *leaf, 87 87 struct btrfs_extent_item *ei); ··· 95 93 struct btrfs_root *root, 96 94 u64 parent, u64 root_objectid, 97 95 u64 flags, struct btrfs_disk_key *key, 98 - int level, struct btrfs_key *ins); 96 + int level, struct btrfs_key *ins, 97 + int no_quota); 99 98 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 100 99 struct btrfs_root *extent_root, u64 flags, 101 100 int force); ··· 1273 1270 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1274 1271 struct btrfs_root *root, 1275 1272 struct btrfs_path *path, 1276 - int refs_to_drop) 1273 + int refs_to_drop, int *last_ref) 1277 1274 { 1278 1275 struct btrfs_key key; 1279 1276 struct btrfs_extent_data_ref *ref1 = NULL; ··· 1309 1306 1310 1307 if (num_refs == 0) { 1311 1308 ret = btrfs_del_item(trans, root, path); 1309 + *last_ref = 1; 1312 1310 } else { 1313 1311 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1314 1312 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); ··· 1767 1763 struct btrfs_path *path, 1768 1764 struct btrfs_extent_inline_ref *iref, 1769 1765 int refs_to_mod, 1770 - struct btrfs_delayed_extent_op *extent_op) 1766 + struct btrfs_delayed_extent_op *extent_op, 1767 + int *last_ref) 1771 1768 { 1772 1769 struct extent_buffer *leaf; 1773 1770 struct btrfs_extent_item *ei; ··· 1812 1807 else 1813 1808 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1814 1809 } else { 1810 + *last_ref = 1; 1815 1811 size = btrfs_extent_inline_ref_size(type); 1816 1812 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1817 1813 ptr = (unsigned long)iref; ··· 1844 1838 if (ret == 0) { 1845 1839 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1846 1840 update_inline_extent_backref(root, path, iref, 1847 - refs_to_add, extent_op); 1841 + refs_to_add, extent_op, NULL); 1848 1842 } else if (ret == -ENOENT) { 1849 1843 setup_inline_extent_backref(root, path, iref, parent, 1850 1844 root_objectid, owner, offset, ··· 1877 1871 struct btrfs_root *root, 1878 1872 struct btrfs_path *path, 1879 1873 struct btrfs_extent_inline_ref *iref, 1880 - int refs_to_drop, int is_data) 1874 + int refs_to_drop, int is_data, int *last_ref) 1881 1875 { 1882 1876 int ret = 0; 1883 1877 1884 1878 BUG_ON(!is_data && refs_to_drop != 1); 1885 1879 if (iref) { 1886 1880 update_inline_extent_backref(root, path, iref, 1887 - -refs_to_drop, NULL); 1881 + -refs_to_drop, NULL, last_ref); 1888 1882 } else if (is_data) { 1889 - ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1883 + ret = remove_extent_data_ref(trans, root, path, refs_to_drop, 1884 + last_ref); 1890 1885 } else { 1886 + *last_ref = 1; 1891 1887 ret = btrfs_del_item(trans, root, path); 1892 1888 } 1893 1889 return ret; ··· 1953 1945 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1954 1946 struct btrfs_root *root, 1955 1947 u64 bytenr, u64 num_bytes, u64 parent, 1956 - u64 root_objectid, u64 owner, u64 offset, int for_cow) 1948 + u64 root_objectid, u64 owner, u64 offset, 1949 + int no_quota) 1957 1950 { 1958 1951 int ret; 1959 1952 struct btrfs_fs_info *fs_info = root->fs_info; ··· 1966 1957 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 1967 1958 num_bytes, 1968 1959 parent, root_objectid, (int)owner, 1969 - BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1960 + BTRFS_ADD_DELAYED_REF, NULL, no_quota); 1970 1961 } else { 1971 1962 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 1972 1963 num_bytes, 1973 1964 parent, root_objectid, owner, offset, 1974 - BTRFS_ADD_DELAYED_REF, NULL, for_cow); 1965 + BTRFS_ADD_DELAYED_REF, NULL, no_quota); 1975 1966 } 1976 1967 return ret; 1977 1968 } ··· 1981 1972 u64 bytenr, u64 num_bytes, 1982 1973 u64 parent, u64 root_objectid, 1983 1974 u64 owner, u64 offset, int refs_to_add, 1975 + int no_quota, 1984 1976 struct btrfs_delayed_extent_op *extent_op) 1985 1977 { 1978 + struct btrfs_fs_info *fs_info = root->fs_info; 1986 1979 struct btrfs_path *path; 1987 1980 struct extent_buffer *leaf; 1988 1981 struct btrfs_extent_item *item; 1982 + struct btrfs_key key; 1989 1983 u64 refs; 1990 1984 int ret; 1985 + enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; 1991 1986 1992 1987 path = btrfs_alloc_path(); 1993 1988 if (!path) 1994 1989 return -ENOMEM; 1995 1990 1991 + if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) 1992 + no_quota = 1; 1993 + 1996 1994 path->reada = 1; 1997 1995 path->leave_spinning = 1; 1998 1996 /* this will setup the path even if it fails to insert the back ref */ 1999 - ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 2000 - path, bytenr, num_bytes, parent, 1997 + ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, 1998 + bytenr, num_bytes, parent, 2001 1999 root_objectid, owner, offset, 2002 2000 refs_to_add, extent_op); 2003 - if (ret != -EAGAIN) 2001 + if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota)) 2004 2002 goto out; 2003 + /* 2004 + * Ok we were able to insert an inline extent and it appears to be a new 2005 + * reference, deal with the qgroup accounting. 2006 + */ 2007 + if (!ret && !no_quota) { 2008 + ASSERT(root->fs_info->quota_enabled); 2009 + leaf = path->nodes[0]; 2010 + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2011 + item = btrfs_item_ptr(leaf, path->slots[0], 2012 + struct btrfs_extent_item); 2013 + if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add) 2014 + type = BTRFS_QGROUP_OPER_ADD_SHARED; 2015 + btrfs_release_path(path); 2005 2016 2017 + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, 2018 + bytenr, num_bytes, type, 0); 2019 + goto out; 2020 + } 2021 + 2022 + /* 2023 + * Ok we had -EAGAIN which means we didn't have space to insert and 2024 + * inline extent ref, so just update the reference count and add a 2025 + * normal backref. 2026 + */ 2006 2027 leaf = path->nodes[0]; 2028 + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2007 2029 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2008 2030 refs = btrfs_extent_refs(leaf, item); 2031 + if (refs) 2032 + type = BTRFS_QGROUP_OPER_ADD_SHARED; 2009 2033 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 2010 2034 if (extent_op) 2011 2035 __run_delayed_extent_op(extent_op, leaf, item); ··· 2046 2004 btrfs_mark_buffer_dirty(leaf); 2047 2005 btrfs_release_path(path); 2048 2006 2007 + if (!no_quota) { 2008 + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, 2009 + bytenr, num_bytes, type, 0); 2010 + if (ret) 2011 + goto out; 2012 + } 2013 + 2049 2014 path->reada = 1; 2050 2015 path->leave_spinning = 1; 2051 - 2052 2016 /* now insert the actual backref */ 2053 2017 ret = insert_extent_backref(trans, root->fs_info->extent_root, 2054 2018 path, bytenr, parent, root_objectid, ··· 2088 2040 2089 2041 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2090 2042 parent = ref->parent; 2091 - else 2092 - ref_root = ref->root; 2043 + ref_root = ref->root; 2093 2044 2094 2045 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2095 2046 if (extent_op) ··· 2102 2055 node->num_bytes, parent, 2103 2056 ref_root, ref->objectid, 2104 2057 ref->offset, node->ref_mod, 2105 - extent_op); 2058 + node->no_quota, extent_op); 2106 2059 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2107 2060 ret = __btrfs_free_extent(trans, root, node->bytenr, 2108 2061 node->num_bytes, parent, 2109 2062 ref_root, ref->objectid, 2110 2063 ref->offset, node->ref_mod, 2111 - extent_op); 2064 + extent_op, node->no_quota); 2112 2065 } else { 2113 2066 BUG(); 2114 2067 } ··· 2245 2198 2246 2199 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2247 2200 parent = ref->parent; 2248 - else 2249 - ref_root = ref->root; 2201 + ref_root = ref->root; 2250 2202 2251 2203 ins.objectid = node->bytenr; 2252 2204 if (skinny_metadata) { ··· 2263 2217 parent, ref_root, 2264 2218 extent_op->flags_to_set, 2265 2219 &extent_op->key, 2266 - ref->level, &ins); 2220 + ref->level, &ins, 2221 + node->no_quota); 2267 2222 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2268 2223 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2269 2224 node->num_bytes, parent, ref_root, 2270 - ref->level, 0, 1, extent_op); 2225 + ref->level, 0, 1, node->no_quota, 2226 + extent_op); 2271 2227 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2272 2228 ret = __btrfs_free_extent(trans, root, node->bytenr, 2273 2229 node->num_bytes, parent, ref_root, 2274 - ref->level, 0, 1, extent_op); 2230 + ref->level, 0, 1, extent_op, 2231 + node->no_quota); 2275 2232 } else { 2276 2233 BUG(); 2277 2234 } ··· 2622 2573 } 2623 2574 #endif 2624 2575 2625 - int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, 2626 - struct btrfs_fs_info *fs_info) 2627 - { 2628 - struct qgroup_update *qgroup_update; 2629 - int ret = 0; 2630 - 2631 - if (list_empty(&trans->qgroup_ref_list) != 2632 - !trans->delayed_ref_elem.seq) { 2633 - /* list without seq or seq without list */ 2634 - btrfs_err(fs_info, 2635 - "qgroup accounting update error, list is%s empty, seq is %#x.%x", 2636 - list_empty(&trans->qgroup_ref_list) ? "" : " not", 2637 - (u32)(trans->delayed_ref_elem.seq >> 32), 2638 - (u32)trans->delayed_ref_elem.seq); 2639 - BUG(); 2640 - } 2641 - 2642 - if (!trans->delayed_ref_elem.seq) 2643 - return 0; 2644 - 2645 - while (!list_empty(&trans->qgroup_ref_list)) { 2646 - qgroup_update = list_first_entry(&trans->qgroup_ref_list, 2647 - struct qgroup_update, list); 2648 - list_del(&qgroup_update->list); 2649 - if (!ret) 2650 - ret = btrfs_qgroup_account_ref( 2651 - trans, fs_info, qgroup_update->node, 2652 - qgroup_update->extent_op); 2653 - kfree(qgroup_update); 2654 - } 2655 - 2656 - btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); 2657 - 2658 - return ret; 2659 - } 2660 - 2661 2576 static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2662 2577 { 2663 2578 u64 num_bytes; ··· 2710 2697 if (root == root->fs_info->extent_root) 2711 2698 root = root->fs_info->tree_root; 2712 2699 2713 - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 2714 - 2715 2700 delayed_refs = &trans->transaction->delayed_refs; 2716 2701 if (count == 0) { 2717 2702 count = atomic_read(&delayed_refs->num_entries) * 2; ··· 2768 2757 goto again; 2769 2758 } 2770 2759 out: 2760 + ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info); 2761 + if (ret) 2762 + return ret; 2771 2763 assert_qgroups_uptodate(trans); 2772 2764 return 0; 2773 2765 } ··· 2977 2963 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2978 2964 struct btrfs_root *root, 2979 2965 struct extent_buffer *buf, 2980 - int full_backref, int inc, int for_cow) 2966 + int full_backref, int inc, int no_quota) 2981 2967 { 2982 2968 u64 bytenr; 2983 2969 u64 num_bytes; ··· 3027 3013 key.offset -= btrfs_file_extent_offset(buf, fi); 3028 3014 ret = process_func(trans, root, bytenr, num_bytes, 3029 3015 parent, ref_root, key.objectid, 3030 - key.offset, for_cow); 3016 + key.offset, no_quota); 3031 3017 if (ret) 3032 3018 goto fail; 3033 3019 } else { ··· 3035 3021 num_bytes = btrfs_level_size(root, level - 1); 3036 3022 ret = process_func(trans, root, bytenr, num_bytes, 3037 3023 parent, ref_root, level - 1, 0, 3038 - for_cow); 3024 + no_quota); 3039 3025 if (ret) 3040 3026 goto fail; 3041 3027 } ··· 3046 3032 } 3047 3033 3048 3034 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3049 - struct extent_buffer *buf, int full_backref, int for_cow) 3035 + struct extent_buffer *buf, int full_backref, int no_quota) 3050 3036 { 3051 - return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); 3037 + return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3052 3038 } 3053 3039 3054 3040 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3055 - struct extent_buffer *buf, int full_backref, int for_cow) 3041 + struct extent_buffer *buf, int full_backref, int no_quota) 3056 3042 { 3057 - return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); 3043 + return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3058 3044 } 3059 3045 3060 3046 static int write_one_cache_group(struct btrfs_trans_handle *trans, ··· 5737 5723 u64 bytenr, u64 num_bytes, u64 parent, 5738 5724 u64 root_objectid, u64 owner_objectid, 5739 5725 u64 owner_offset, int refs_to_drop, 5740 - struct btrfs_delayed_extent_op *extent_op) 5726 + struct btrfs_delayed_extent_op *extent_op, 5727 + int no_quota) 5741 5728 { 5742 5729 struct btrfs_key key; 5743 5730 struct btrfs_path *path; ··· 5754 5739 int num_to_del = 1; 5755 5740 u32 item_size; 5756 5741 u64 refs; 5742 + int last_ref = 0; 5743 + enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; 5757 5744 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 5758 5745 SKINNY_METADATA); 5746 + 5747 + if (!info->quota_enabled || !is_fstree(root_objectid)) 5748 + no_quota = 1; 5759 5749 5760 5750 path = btrfs_alloc_path(); 5761 5751 if (!path) ··· 5809 5789 BUG_ON(iref); 5810 5790 ret = remove_extent_backref(trans, extent_root, path, 5811 5791 NULL, refs_to_drop, 5812 - is_data); 5792 + is_data, &last_ref); 5813 5793 if (ret) { 5814 5794 btrfs_abort_transaction(trans, extent_root, ret); 5815 5795 goto out; ··· 5936 5916 refs -= refs_to_drop; 5937 5917 5938 5918 if (refs > 0) { 5919 + type = BTRFS_QGROUP_OPER_SUB_SHARED; 5939 5920 if (extent_op) 5940 5921 __run_delayed_extent_op(extent_op, leaf, ei); 5941 5922 /* ··· 5952 5931 if (found_extent) { 5953 5932 ret = remove_extent_backref(trans, extent_root, path, 5954 5933 iref, refs_to_drop, 5955 - is_data); 5934 + is_data, &last_ref); 5956 5935 if (ret) { 5957 5936 btrfs_abort_transaction(trans, extent_root, ret); 5958 5937 goto out; ··· 5973 5952 } 5974 5953 } 5975 5954 5955 + last_ref = 1; 5976 5956 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 5977 5957 num_to_del); 5978 5958 if (ret) { ··· 5995 5973 btrfs_abort_transaction(trans, extent_root, ret); 5996 5974 goto out; 5997 5975 } 5976 + } 5977 + btrfs_release_path(path); 5978 + 5979 + /* Deal with the quota accounting */ 5980 + if (!ret && last_ref && !no_quota) { 5981 + int mod_seq = 0; 5982 + 5983 + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && 5984 + type == BTRFS_QGROUP_OPER_SUB_SHARED) 5985 + mod_seq = 1; 5986 + 5987 + ret = btrfs_qgroup_record_ref(trans, info, root_objectid, 5988 + bytenr, num_bytes, type, 5989 + mod_seq); 5998 5990 } 5999 5991 out: 6000 5992 btrfs_free_path(path); ··· 6146 6110 /* Can return -ENOMEM */ 6147 6111 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, 6148 6112 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 6149 - u64 owner, u64 offset, int for_cow) 6113 + u64 owner, u64 offset, int no_quota) 6150 6114 { 6151 6115 int ret; 6152 6116 struct btrfs_fs_info *fs_info = root->fs_info; ··· 6166 6130 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 6167 6131 num_bytes, 6168 6132 parent, root_objectid, (int)owner, 6169 - BTRFS_DROP_DELAYED_REF, NULL, for_cow); 6133 + BTRFS_DROP_DELAYED_REF, NULL, no_quota); 6170 6134 } else { 6171 6135 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 6172 6136 num_bytes, 6173 6137 parent, root_objectid, owner, 6174 6138 offset, BTRFS_DROP_DELAYED_REF, 6175 - NULL, for_cow); 6139 + NULL, no_quota); 6176 6140 } 6177 6141 return ret; 6178 6142 } ··· 6878 6842 btrfs_mark_buffer_dirty(path->nodes[0]); 6879 6843 btrfs_free_path(path); 6880 6844 6845 + /* Always set parent to 0 here since its exclusive anyway. */ 6846 + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, 6847 + ins->objectid, ins->offset, 6848 + BTRFS_QGROUP_OPER_ADD_EXCL, 0); 6849 + if (ret) 6850 + return ret; 6851 + 6881 6852 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6882 6853 if (ret) { /* -ENOENT, logic error */ 6883 6854 btrfs_err(fs_info, "update block group failed for %llu %llu", ··· 6899 6856 struct btrfs_root *root, 6900 6857 u64 parent, u64 root_objectid, 6901 6858 u64 flags, struct btrfs_disk_key *key, 6902 - int level, struct btrfs_key *ins) 6859 + int level, struct btrfs_key *ins, 6860 + int no_quota) 6903 6861 { 6904 6862 int ret; 6905 6863 struct btrfs_fs_info *fs_info = root->fs_info; ··· 6910 6866 struct btrfs_path *path; 6911 6867 struct extent_buffer *leaf; 6912 6868 u32 size = sizeof(*extent_item) + sizeof(*iref); 6869 + u64 num_bytes = ins->offset; 6913 6870 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 6914 6871 SKINNY_METADATA); 6915 6872 ··· 6944 6899 6945 6900 if (skinny_metadata) { 6946 6901 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 6902 + num_bytes = root->leafsize; 6947 6903 } else { 6948 6904 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 6949 6905 btrfs_set_tree_block_key(leaf, block_info, key); ··· 6965 6919 6966 6920 btrfs_mark_buffer_dirty(leaf); 6967 6921 btrfs_free_path(path); 6922 + 6923 + if (!no_quota) { 6924 + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, 6925 + ins->objectid, num_bytes, 6926 + BTRFS_QGROUP_OPER_ADD_EXCL, 0); 6927 + if (ret) 6928 + return ret; 6929 + } 6968 6930 6969 6931 ret = update_block_group(root, ins->objectid, root->leafsize, 1); 6970 6932 if (ret) { /* -ENOENT, logic error */
+3 -2
fs/btrfs/file.c
··· 40 40 #include "tree-log.h" 41 41 #include "locking.h" 42 42 #include "volumes.h" 43 + #include "qgroup.h" 43 44 44 45 static struct kmem_cache *btrfs_inode_defrag_cachep; 45 46 /* ··· 850 849 disk_bytenr, num_bytes, 0, 851 850 root->root_key.objectid, 852 851 new_key.objectid, 853 - start - extent_offset, 0); 852 + start - extent_offset, 1); 854 853 BUG_ON(ret); /* -ENOMEM */ 855 854 } 856 855 key.offset = start; ··· 1207 1206 1208 1207 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 1209 1208 root->root_key.objectid, 1210 - ino, orig_offset, 0); 1209 + ino, orig_offset, 1); 1211 1210 BUG_ON(ret); /* -ENOMEM */ 1212 1211 1213 1212 if (split == start) {
+62 -1
fs/btrfs/ioctl.c
··· 58 58 #include "dev-replace.h" 59 59 #include "props.h" 60 60 #include "sysfs.h" 61 + #include "qgroup.h" 61 62 62 63 #ifdef CONFIG_64BIT 63 64 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI ··· 2942 2941 return ret; 2943 2942 } 2944 2943 2944 + /* Helper to check and see if this root currently has a ref on the given disk 2945 + * bytenr. If it does then we need to update the quota for this root. This 2946 + * doesn't do anything if quotas aren't enabled. 2947 + */ 2948 + static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2949 + u64 disko) 2950 + { 2951 + struct seq_list tree_mod_seq_elem = {}; 2952 + struct ulist *roots; 2953 + struct ulist_iterator uiter; 2954 + struct ulist_node *root_node = NULL; 2955 + int ret; 2956 + 2957 + if (!root->fs_info->quota_enabled) 2958 + return 1; 2959 + 2960 + btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); 2961 + ret = btrfs_find_all_roots(trans, root->fs_info, disko, 2962 + tree_mod_seq_elem.seq, &roots); 2963 + if (ret < 0) 2964 + goto out; 2965 + ret = 0; 2966 + ULIST_ITER_INIT(&uiter); 2967 + while ((root_node = ulist_next(roots, &uiter))) { 2968 + if (root_node->val == root->objectid) { 2969 + ret = 1; 2970 + break; 2971 + } 2972 + } 2973 + ulist_free(roots); 2974 + out: 2975 + btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); 2976 + return ret; 2977 + } 2978 + 2945 2979 /** 2946 2980 * btrfs_clone() - clone a range from inode file to another 2947 2981 * ··· 3000 2964 u32 nritems; 3001 2965 int slot; 3002 2966 int ret; 2967 + int no_quota; 3003 2968 u64 len = olen_aligned; 2969 + u64 last_disko = 0; 3004 2970 3005 2971 ret = -ENOMEM; 3006 2972 buf = vmalloc(btrfs_level_size(root, 0)); ··· 3034 2996 3035 2997 nritems = btrfs_header_nritems(path->nodes[0]); 3036 2998 process_slot: 2999 + no_quota = 1; 3037 3000 if (path->slots[0] >= nritems) { 3038 3001 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 3039 3002 if (ret < 0) ··· 3167 3128 datao); 3168 3129 btrfs_set_file_extent_num_bytes(leaf, extent, 3169 3130 datal); 3131 + 3132 + /* 3133 + * We need to look up the roots that point at 3134 + * this bytenr and see if the new root does. If 3135 + * it does not we need to make sure we update 3136 + * quotas appropriately. 3137 + */ 3138 + if (disko && root != BTRFS_I(src)->root && 3139 + disko != last_disko) { 3140 + no_quota = check_ref(trans, root, 3141 + disko); 3142 + if (no_quota < 0) { 3143 + btrfs_abort_transaction(trans, 3144 + root, 3145 + ret); 3146 + btrfs_end_transaction(trans, 3147 + root); 3148 + ret = no_quota; 3149 + goto out; 3150 + } 3151 + } 3152 + 3170 3153 if (disko) { 3171 3154 inode_add_bytes(inode, datal); 3172 3155 ret = btrfs_inc_extent_ref(trans, root, ··· 3196 3135 root->root_key.objectid, 3197 3136 btrfs_ino(inode), 3198 3137 new_key.offset - datao, 3199 - 0); 3138 + no_quota); 3200 3139 if (ret) { 3201 3140 btrfs_abort_transaction(trans, 3202 3141 root,
+683 -254
fs/btrfs/qgroup.c
··· 32 32 #include "ulist.h" 33 33 #include "backref.h" 34 34 #include "extent_io.h" 35 + #include "qgroup.h" 35 36 36 37 /* TODO XXX FIXME 37 38 * - subvol delete -> delete when ref goes to 0? delete limits also? ··· 85 84 /* 86 85 * temp variables for accounting operations 87 86 */ 88 - u64 tag; 89 - u64 refcnt; 87 + u64 old_refcnt; 88 + u64 new_refcnt; 90 89 }; 91 90 92 91 /* ··· 98 97 struct btrfs_qgroup *group; 99 98 struct btrfs_qgroup *member; 100 99 }; 100 + 101 + #define ptr_to_u64(x) ((u64)(uintptr_t)x) 102 + #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) 101 103 102 104 static int 103 105 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, ··· 1178 1174 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1179 1175 return ret; 1180 1176 } 1177 + static int comp_oper(struct btrfs_qgroup_operation *oper1, 1178 + struct btrfs_qgroup_operation *oper2) 1179 + { 1180 + if (oper1->bytenr < oper2->bytenr) 1181 + return -1; 1182 + if (oper1->bytenr > oper2->bytenr) 1183 + return 1; 1184 + if (oper1->seq < oper2->seq) 1185 + return -1; 1186 + if (oper1->seq > oper2->seq) 1187 + return -1; 1188 + if (oper1->ref_root < oper2->ref_root) 1189 + return -1; 1190 + if (oper1->ref_root > oper2->ref_root) 1191 + return 1; 1192 + if (oper1->type < oper2->type) 1193 + return -1; 1194 + if (oper1->type > oper2->type) 1195 + return 1; 1196 + return 0; 1197 + } 1198 + 1199 + static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1200 + struct btrfs_qgroup_operation *oper) 1201 + { 1202 + struct rb_node **p; 1203 + struct rb_node *parent = NULL; 1204 + struct btrfs_qgroup_operation *cur; 1205 + int cmp; 1206 + 1207 + spin_lock(&fs_info->qgroup_op_lock); 1208 + p = &fs_info->qgroup_op_tree.rb_node; 1209 + while (*p) { 1210 + parent = *p; 1211 + cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1212 + cmp = comp_oper(cur, oper); 1213 + if (cmp < 0) { 1214 + p = &(*p)->rb_right; 1215 + } else if (cmp) { 1216 + p = &(*p)->rb_left; 1217 + } else { 1218 + spin_unlock(&fs_info->qgroup_op_lock); 1219 + return -EEXIST; 1220 + } 1221 + } 1222 + rb_link_node(&oper->n, parent, p); 1223 + rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1224 + spin_unlock(&fs_info->qgroup_op_lock); 1225 + return 0; 1226 + } 1181 1227 1182 1228 /* 1183 - * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1184 - * the modification into a list that's later used by btrfs_end_transaction to 1185 - * pass the recorded modifications on to btrfs_qgroup_account_ref. 1229 + * Record a quota operation for processing later on. 1230 + * @trans: the transaction we are adding the delayed op to. 1231 + * @fs_info: the fs_info for this fs. 1232 + * @ref_root: the root of the reference we are acting on, 1233 + * @bytenr: the bytenr we are acting on. 1234 + * @num_bytes: the number of bytes in the reference. 1235 + * @type: the type of operation this is. 1236 + * @mod_seq: do we need to get a sequence number for looking up roots. 1237 + * 1238 + * We just add it to our trans qgroup_ref_list and carry on and process these 1239 + * operations in order at some later point. If the reference root isn't a fs 1240 + * root then we don't bother with doing anything. 1241 + * 1242 + * MUST BE HOLDING THE REF LOCK. 1186 1243 */ 1187 1244 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1188 - struct btrfs_delayed_ref_node *node, 1189 - struct btrfs_delayed_extent_op *extent_op) 1245 + struct btrfs_fs_info *fs_info, u64 ref_root, 1246 + u64 bytenr, u64 num_bytes, 1247 + enum btrfs_qgroup_operation_type type, int mod_seq) 1190 1248 { 1191 - struct qgroup_update *u; 1249 + struct btrfs_qgroup_operation *oper; 1250 + int ret; 1192 1251 1193 - BUG_ON(!trans->delayed_ref_elem.seq); 1194 - u = kmalloc(sizeof(*u), GFP_NOFS); 1195 - if (!u) 1252 + if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1253 + return 0; 1254 + 1255 + oper = kmalloc(sizeof(*oper), GFP_NOFS); 1256 + if (!oper) 1196 1257 return -ENOMEM; 1197 1258 1198 - u->node = node; 1199 - u->extent_op = extent_op; 1200 - list_add_tail(&u->list, &trans->qgroup_ref_list); 1259 + oper->ref_root = ref_root; 1260 + oper->bytenr = bytenr; 1261 + oper->num_bytes = num_bytes; 1262 + oper->type = type; 1263 + oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1264 + INIT_LIST_HEAD(&oper->elem.list); 1265 + oper->elem.seq = 0; 1266 + ret = insert_qgroup_oper(fs_info, oper); 1267 + if (ret) { 1268 + /* Shouldn't happen so have an assert for developers */ 1269 + ASSERT(0); 1270 + kfree(oper); 1271 + return ret; 1272 + } 1273 + list_add_tail(&oper->list, &trans->qgroup_ref_list); 1274 + 1275 + if (mod_seq) 1276 + btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1201 1277 1202 1278 return 0; 1203 1279 } 1204 1280 1205 - static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, 1206 - struct ulist *roots, struct ulist *tmp, 1207 - u64 seq) 1281 + /* 1282 + * The easy accounting, if we are adding/removing the only ref for an extent 1283 + * then this qgroup and all of the parent qgroups get their refrence and 1284 + * exclusive counts adjusted. 1285 + */ 1286 + static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1287 + struct btrfs_qgroup_operation *oper) 1288 + { 1289 + struct btrfs_qgroup *qgroup; 1290 + struct ulist *tmp; 1291 + struct btrfs_qgroup_list *glist; 1292 + struct ulist_node *unode; 1293 + struct ulist_iterator uiter; 1294 + int sign = 0; 1295 + int ret = 0; 1296 + 1297 + tmp = ulist_alloc(GFP_NOFS); 1298 + if (!tmp) 1299 + return -ENOMEM; 1300 + 1301 + spin_lock(&fs_info->qgroup_lock); 1302 + if (!fs_info->quota_root) 1303 + goto out; 1304 + qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1305 + if (!qgroup) 1306 + goto out; 1307 + switch (oper->type) { 1308 + case BTRFS_QGROUP_OPER_ADD_EXCL: 1309 + sign = 1; 1310 + break; 1311 + case BTRFS_QGROUP_OPER_SUB_EXCL: 1312 + sign = -1; 1313 + break; 1314 + default: 1315 + ASSERT(0); 1316 + } 1317 + qgroup->rfer += sign * oper->num_bytes; 1318 + qgroup->rfer_cmpr += sign * oper->num_bytes; 1319 + 1320 + WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); 1321 + qgroup->excl += sign * oper->num_bytes; 1322 + qgroup->excl_cmpr += sign * oper->num_bytes; 1323 + 1324 + qgroup_dirty(fs_info, qgroup); 1325 + 1326 + /* Get all of the parent groups that contain this qgroup */ 1327 + list_for_each_entry(glist, &qgroup->groups, next_group) { 1328 + ret = ulist_add(tmp, glist->group->qgroupid, 1329 + ptr_to_u64(glist->group), GFP_ATOMIC); 1330 + if (ret < 0) 1331 + goto out; 1332 + } 1333 + 1334 + /* Iterate all of the parents and adjust their reference counts */ 1335 + ULIST_ITER_INIT(&uiter); 1336 + while ((unode = ulist_next(tmp, &uiter))) { 1337 + qgroup = u64_to_ptr(unode->aux); 1338 + qgroup->rfer += sign * oper->num_bytes; 1339 + qgroup->rfer_cmpr += sign * oper->num_bytes; 1340 + qgroup->excl += sign * oper->num_bytes; 1341 + if (sign < 0) 1342 + WARN_ON(qgroup->excl < oper->num_bytes); 1343 + qgroup->excl_cmpr += sign * oper->num_bytes; 1344 + qgroup_dirty(fs_info, qgroup); 1345 + 1346 + /* Add any parents of the parents */ 1347 + list_for_each_entry(glist, &qgroup->groups, next_group) { 1348 + ret = ulist_add(tmp, glist->group->qgroupid, 1349 + ptr_to_u64(glist->group), GFP_ATOMIC); 1350 + if (ret < 0) 1351 + goto out; 1352 + } 1353 + } 1354 + ret = 0; 1355 + out: 1356 + spin_unlock(&fs_info->qgroup_lock); 1357 + ulist_free(tmp); 1358 + return ret; 1359 + } 1360 + 1361 + /* 1362 + * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1363 + * properly. 1364 + */ 1365 + static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1366 + u64 root_to_skip, struct ulist *tmp, 1367 + struct ulist *roots, struct ulist *qgroups, 1368 + u64 seq, int *old_roots, int rescan) 1208 1369 { 1209 1370 struct ulist_node *unode; 1210 1371 struct ulist_iterator uiter; ··· 1380 1211 1381 1212 ULIST_ITER_INIT(&uiter); 1382 1213 while ((unode = ulist_next(roots, &uiter))) { 1214 + /* We don't count our current root here */ 1215 + if (unode->val == root_to_skip) 1216 + continue; 1383 1217 qg = find_qgroup_rb(fs_info, unode->val); 1384 1218 if (!qg) 1385 1219 continue; 1220 + /* 1221 + * We could have a pending removal of this same ref so we may 1222 + * not have actually found our ref root when doing 1223 + * btrfs_find_all_roots, so we need to keep track of how many 1224 + * old roots we find in case we removed ours and added a 1225 + * different one at the same time. I don't think this could 1226 + * happen in practice but that sort of thinking leads to pain 1227 + * and suffering and to the dark side. 1228 + */ 1229 + (*old_roots)++; 1386 1230 1387 1231 ulist_reinit(tmp); 1388 - /* XXX id not needed */ 1389 - ret = ulist_add(tmp, qg->qgroupid, 1390 - (u64)(uintptr_t)qg, GFP_ATOMIC); 1232 + ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1233 + GFP_ATOMIC); 1234 + if (ret < 0) 1235 + return ret; 1236 + ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1391 1237 if (ret < 0) 1392 1238 return ret; 1393 1239 ULIST_ITER_INIT(&tmp_uiter); 1394 1240 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1395 1241 struct btrfs_qgroup_list *glist; 1396 1242 1397 - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1398 - if (qg->refcnt < seq) 1399 - qg->refcnt = seq + 1; 1243 + qg = u64_to_ptr(tmp_unode->aux); 1244 + /* 1245 + * We use this sequence number to keep from having to 1246 + * run the whole list and 0 out the refcnt every time. 1247 + * We basically use sequnce as the known 0 count and 1248 + * then add 1 everytime we see a qgroup. This is how we 1249 + * get how many of the roots actually point up to the 1250 + * upper level qgroups in order to determine exclusive 1251 + * counts. 1252 + * 1253 + * For rescan we want to set old_refcnt to seq so our 1254 + * exclusive calculations end up correct. 1255 + */ 1256 + if (rescan) 1257 + qg->old_refcnt = seq; 1258 + else if (qg->old_refcnt < seq) 1259 + qg->old_refcnt = seq + 1; 1400 1260 else 1401 - ++qg->refcnt; 1261 + qg->old_refcnt++; 1402 1262 1263 + if (qg->new_refcnt < seq) 1264 + qg->new_refcnt = seq + 1; 1265 + else 1266 + qg->new_refcnt++; 1403 1267 list_for_each_entry(glist, &qg->groups, next_group) { 1268 + ret = ulist_add(qgroups, glist->group->qgroupid, 1269 + ptr_to_u64(glist->group), 1270 + GFP_ATOMIC); 1271 + if (ret < 0) 1272 + return ret; 1404 1273 ret = ulist_add(tmp, glist->group->qgroupid, 1405 - (u64)(uintptr_t)glist->group, 1274 + ptr_to_u64(glist->group), 1406 1275 GFP_ATOMIC); 1407 1276 if (ret < 0) 1408 1277 return ret; 1409 1278 } 1410 1279 } 1411 1280 } 1412 - 1413 1281 return 0; 1414 1282 } 1415 1283 1416 - static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, 1417 - struct ulist *roots, struct ulist *tmp, 1418 - u64 seq, int sgn, u64 num_bytes, 1419 - struct btrfs_qgroup *qgroup) 1284 + /* 1285 + * We need to walk forward in our operation tree and account for any roots that 1286 + * were deleted after we made this operation. 1287 + */ 1288 + static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1289 + struct btrfs_qgroup_operation *oper, 1290 + struct ulist *tmp, 1291 + struct ulist *qgroups, u64 seq, 1292 + int *old_roots) 1420 1293 { 1421 1294 struct ulist_node *unode; 1422 1295 struct ulist_iterator uiter; 1423 1296 struct btrfs_qgroup *qg; 1424 - struct btrfs_qgroup_list *glist; 1297 + struct btrfs_qgroup_operation *tmp_oper; 1298 + struct rb_node *n; 1425 1299 int ret; 1426 1300 1427 1301 ulist_reinit(tmp); 1428 - ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1429 - if (ret < 0) 1430 - return ret; 1431 1302 1303 + /* 1304 + * We only walk forward in the tree since we're only interested in 1305 + * removals that happened _after_ our operation. 1306 + */ 1307 + spin_lock(&fs_info->qgroup_op_lock); 1308 + n = rb_next(&oper->n); 1309 + spin_unlock(&fs_info->qgroup_op_lock); 1310 + if (!n) 1311 + return 0; 1312 + tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1313 + while (tmp_oper->bytenr == oper->bytenr) { 1314 + /* 1315 + * If it's not a removal we don't care, additions work out 1316 + * properly with our refcnt tracking. 1317 + */ 1318 + if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1319 + tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1320 + goto next; 1321 + qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1322 + if (!qg) 1323 + goto next; 1324 + ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1325 + GFP_ATOMIC); 1326 + if (ret) { 1327 + if (ret < 0) 1328 + return ret; 1329 + /* 1330 + * We only want to increase old_roots if this qgroup is 1331 + * not already in the list of qgroups. If it is already 1332 + * there then that means it must have been re-added or 1333 + * the delete will be discarded because we had an 1334 + * existing ref that we haven't looked up yet. In this 1335 + * case we don't want to increase old_roots. So if ret 1336 + * == 1 then we know that this is the first time we've 1337 + * seen this qgroup and we can bump the old_roots. 1338 + */ 1339 + (*old_roots)++; 1340 + ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1341 + GFP_ATOMIC); 1342 + if (ret < 0) 1343 + return ret; 1344 + } 1345 + next: 1346 + spin_lock(&fs_info->qgroup_op_lock); 1347 + n = rb_next(&tmp_oper->n); 1348 + spin_unlock(&fs_info->qgroup_op_lock); 1349 + if (!n) 1350 + break; 1351 + tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1352 + } 1353 + 1354 + /* Ok now process the qgroups we found */ 1432 1355 ULIST_ITER_INIT(&uiter); 1433 1356 while ((unode = ulist_next(tmp, &uiter))) { 1434 - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1435 - if (qg->refcnt < seq) { 1436 - /* not visited by step 1 */ 1437 - qg->rfer += sgn * num_bytes; 1438 - qg->rfer_cmpr += sgn * num_bytes; 1439 - if (roots->nnodes == 0) { 1440 - qg->excl += sgn * num_bytes; 1441 - qg->excl_cmpr += sgn * num_bytes; 1442 - } 1443 - qgroup_dirty(fs_info, qg); 1444 - } 1445 - WARN_ON(qg->tag >= seq); 1446 - qg->tag = seq; 1357 + struct btrfs_qgroup_list *glist; 1447 1358 1359 + qg = u64_to_ptr(unode->aux); 1360 + if (qg->old_refcnt < seq) 1361 + qg->old_refcnt = seq + 1; 1362 + else 1363 + qg->old_refcnt++; 1364 + if (qg->new_refcnt < seq) 1365 + qg->new_refcnt = seq + 1; 1366 + else 1367 + qg->new_refcnt++; 1448 1368 list_for_each_entry(glist, &qg->groups, next_group) { 1369 + ret = ulist_add(qgroups, glist->group->qgroupid, 1370 + ptr_to_u64(glist->group), GFP_ATOMIC); 1371 + if (ret < 0) 1372 + return ret; 1449 1373 ret = ulist_add(tmp, glist->group->qgroupid, 1450 - (uintptr_t)glist->group, GFP_ATOMIC); 1374 + ptr_to_u64(glist->group), GFP_ATOMIC); 1451 1375 if (ret < 0) 1452 1376 return ret; 1453 1377 } 1454 1378 } 1455 - 1456 1379 return 0; 1457 1380 } 1458 1381 1459 - static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, 1460 - struct ulist *roots, struct ulist *tmp, 1461 - u64 seq, int sgn, u64 num_bytes) 1382 + /* Add refcnt for the newly added reference. */ 1383 + static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1384 + struct btrfs_qgroup_operation *oper, 1385 + struct btrfs_qgroup *qgroup, 1386 + struct ulist *tmp, struct ulist *qgroups, 1387 + u64 seq) 1462 1388 { 1463 1389 struct ulist_node *unode; 1464 1390 struct ulist_iterator uiter; 1465 1391 struct btrfs_qgroup *qg; 1466 - struct ulist_node *tmp_unode; 1467 - struct ulist_iterator tmp_uiter; 1468 1392 int ret; 1393 + 1394 + ulist_reinit(tmp); 1395 + ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1396 + GFP_ATOMIC); 1397 + if (ret < 0) 1398 + return ret; 1399 + ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1400 + GFP_ATOMIC); 1401 + if (ret < 0) 1402 + return ret; 1403 + ULIST_ITER_INIT(&uiter); 1404 + while ((unode = ulist_next(tmp, &uiter))) { 1405 + struct btrfs_qgroup_list *glist; 1406 + 1407 + qg = u64_to_ptr(unode->aux); 1408 + if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1409 + if (qg->new_refcnt < seq) 1410 + qg->new_refcnt = seq + 1; 1411 + else 1412 + qg->new_refcnt++; 1413 + } else { 1414 + if (qg->old_refcnt < seq) 1415 + qg->old_refcnt = seq + 1; 1416 + else 1417 + qg->old_refcnt++; 1418 + } 1419 + list_for_each_entry(glist, &qg->groups, next_group) { 1420 + ret = ulist_add(tmp, glist->group->qgroupid, 1421 + ptr_to_u64(glist->group), GFP_ATOMIC); 1422 + if (ret < 0) 1423 + return ret; 1424 + ret = ulist_add(qgroups, glist->group->qgroupid, 1425 + ptr_to_u64(glist->group), GFP_ATOMIC); 1426 + if (ret < 0) 1427 + return ret; 1428 + } 1429 + } 1430 + return 0; 1431 + } 1432 + 1433 + /* 1434 + * This adjusts the counters for all referenced qgroups if need be. 1435 + */ 1436 + static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1437 + u64 root_to_skip, u64 num_bytes, 1438 + struct ulist *qgroups, u64 seq, 1439 + int old_roots, int new_roots, int rescan) 1440 + { 1441 + struct ulist_node *unode; 1442 + struct ulist_iterator uiter; 1443 + struct btrfs_qgroup *qg; 1444 + u64 cur_new_count, cur_old_count; 1445 + 1446 + ULIST_ITER_INIT(&uiter); 1447 + while ((unode = ulist_next(qgroups, &uiter))) { 1448 + bool dirty = false; 1449 + 1450 + qg = u64_to_ptr(unode->aux); 1451 + /* 1452 + * Wasn't referenced before but is now, add to the reference 1453 + * counters. 1454 + */ 1455 + if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { 1456 + qg->rfer += num_bytes; 1457 + qg->rfer_cmpr += num_bytes; 1458 + dirty = true; 1459 + } 1460 + 1461 + /* 1462 + * Was referenced before but isn't now, subtract from the 1463 + * reference counters. 1464 + */ 1465 + if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { 1466 + qg->rfer -= num_bytes; 1467 + qg->rfer_cmpr -= num_bytes; 1468 + dirty = true; 1469 + } 1470 + 1471 + if (qg->old_refcnt < seq) 1472 + cur_old_count = 0; 1473 + else 1474 + cur_old_count = qg->old_refcnt - seq; 1475 + if (qg->new_refcnt < seq) 1476 + cur_new_count = 0; 1477 + else 1478 + cur_new_count = qg->new_refcnt - seq; 1479 + 1480 + /* 1481 + * If our refcount was the same as the roots previously but our 1482 + * new count isn't the same as the number of roots now then we 1483 + * went from having a exclusive reference on this range to not. 1484 + */ 1485 + if (old_roots && cur_old_count == old_roots && 1486 + (cur_new_count != new_roots || new_roots == 0)) { 1487 + WARN_ON(cur_new_count != new_roots && new_roots == 0); 1488 + qg->excl -= num_bytes; 1489 + qg->excl_cmpr -= num_bytes; 1490 + dirty = true; 1491 + } 1492 + 1493 + /* 1494 + * If we didn't reference all the roots before but now we do we 1495 + * have an exclusive reference to this range. 1496 + */ 1497 + if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1498 + && cur_new_count == new_roots) { 1499 + qg->excl += num_bytes; 1500 + qg->excl_cmpr += num_bytes; 1501 + dirty = true; 1502 + } 1503 + 1504 + if (dirty) 1505 + qgroup_dirty(fs_info, qg); 1506 + } 1507 + return 0; 1508 + } 1509 + 1510 + /* 1511 + * If we removed a data extent and there were other references for that bytenr 1512 + * then we need to lookup all referenced roots to make sure we still don't 1513 + * reference this bytenr. If we do then we can just discard this operation. 1514 + */ 1515 + static int check_existing_refs(struct btrfs_trans_handle *trans, 1516 + struct btrfs_fs_info *fs_info, 1517 + struct btrfs_qgroup_operation *oper) 1518 + { 1519 + struct ulist *roots = NULL; 1520 + struct ulist_node *unode; 1521 + struct ulist_iterator uiter; 1522 + int ret = 0; 1523 + 1524 + ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1525 + oper->elem.seq, &roots); 1526 + if (ret < 0) 1527 + return ret; 1528 + ret = 0; 1469 1529 1470 1530 ULIST_ITER_INIT(&uiter); 1471 1531 while ((unode = ulist_next(roots, &uiter))) { 1472 - qg = find_qgroup_rb(fs_info, unode->val); 1473 - if (!qg) 1474 - continue; 1475 - 1476 - ulist_reinit(tmp); 1477 - ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); 1478 - if (ret < 0) 1479 - return ret; 1480 - 1481 - ULIST_ITER_INIT(&tmp_uiter); 1482 - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1483 - struct btrfs_qgroup_list *glist; 1484 - 1485 - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; 1486 - if (qg->tag == seq) 1487 - continue; 1488 - 1489 - if (qg->refcnt - seq == roots->nnodes) { 1490 - qg->excl -= sgn * num_bytes; 1491 - qg->excl_cmpr -= sgn * num_bytes; 1492 - qgroup_dirty(fs_info, qg); 1493 - } 1494 - 1495 - list_for_each_entry(glist, &qg->groups, next_group) { 1496 - ret = ulist_add(tmp, glist->group->qgroupid, 1497 - (uintptr_t)glist->group, 1498 - GFP_ATOMIC); 1499 - if (ret < 0) 1500 - return ret; 1501 - } 1532 + if (unode->val == oper->ref_root) { 1533 + ret = 1; 1534 + break; 1502 1535 } 1503 1536 } 1537 + ulist_free(roots); 1538 + btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1504 1539 1505 - return 0; 1540 + return ret; 1541 + } 1542 + 1543 + /* 1544 + * If we share a reference across multiple roots then we may need to adjust 1545 + * various qgroups referenced and exclusive counters. The basic premise is this 1546 + * 1547 + * 1) We have seq to represent a 0 count. Instead of looping through all of the 1548 + * qgroups and resetting their refcount to 0 we just constantly bump this 1549 + * sequence number to act as the base reference count. This means that if 1550 + * anybody is equal to or below this sequence they were never referenced. We 1551 + * jack this sequence up by the number of roots we found each time in order to 1552 + * make sure we don't have any overlap. 1553 + * 1554 + * 2) We first search all the roots that reference the area _except_ the root 1555 + * we're acting on currently. This makes up the old_refcnt of all the qgroups 1556 + * before. 1557 + * 1558 + * 3) We walk all of the qgroups referenced by the root we are currently acting 1559 + * on, and will either adjust old_refcnt in the case of a removal or the 1560 + * new_refcnt in the case of an addition. 1561 + * 1562 + * 4) Finally we walk all the qgroups that are referenced by this range 1563 + * including the root we are acting on currently. We will adjust the counters 1564 + * based on the number of roots we had and will have after this operation. 1565 + * 1566 + * Take this example as an illustration 1567 + * 1568 + * [qgroup 1/0] 1569 + * / | \ 1570 + * [qg 0/0] [qg 0/1] [qg 0/2] 1571 + * \ | / 1572 + * [ extent ] 1573 + * 1574 + * Say we are adding a reference that is covered by qg 0/0. The first step 1575 + * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1576 + * old_roots being 2. Because it is adding new_roots will be 1. We then go 1577 + * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1578 + * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1579 + * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1580 + * reference and thus must add the size to the referenced bytes. Everything 1581 + * else is the same so nothing else changes. 1582 + */ 1583 + static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1584 + struct btrfs_fs_info *fs_info, 1585 + struct btrfs_qgroup_operation *oper) 1586 + { 1587 + struct ulist *roots = NULL; 1588 + struct ulist *qgroups, *tmp; 1589 + struct btrfs_qgroup *qgroup; 1590 + struct seq_list elem = {}; 1591 + u64 seq; 1592 + int old_roots = 0; 1593 + int new_roots = 0; 1594 + int ret = 0; 1595 + 1596 + if (oper->elem.seq) { 1597 + ret = check_existing_refs(trans, fs_info, oper); 1598 + if (ret < 0) 1599 + return ret; 1600 + if (ret) 1601 + return 0; 1602 + } 1603 + 1604 + qgroups = ulist_alloc(GFP_NOFS); 1605 + if (!qgroups) 1606 + return -ENOMEM; 1607 + 1608 + tmp = ulist_alloc(GFP_NOFS); 1609 + if (!tmp) 1610 + return -ENOMEM; 1611 + 1612 + btrfs_get_tree_mod_seq(fs_info, &elem); 1613 + ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1614 + &roots); 1615 + btrfs_put_tree_mod_seq(fs_info, &elem); 1616 + if (ret < 0) { 1617 + ulist_free(qgroups); 1618 + ulist_free(tmp); 1619 + return ret; 1620 + } 1621 + spin_lock(&fs_info->qgroup_lock); 1622 + qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1623 + if (!qgroup) 1624 + goto out; 1625 + seq = fs_info->qgroup_seq; 1626 + 1627 + /* 1628 + * So roots is the list of all the roots currently pointing at the 1629 + * bytenr, including the ref we are adding if we are adding, or not if 1630 + * we are removing a ref. So we pass in the ref_root to skip that root 1631 + * in our calculations. We set old_refnct and new_refcnt cause who the 1632 + * hell knows what everything looked like before, and it doesn't matter 1633 + * except... 1634 + */ 1635 + ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 1636 + seq, &old_roots, 0); 1637 + if (ret < 0) 1638 + goto out; 1639 + 1640 + /* 1641 + * Now adjust the refcounts of the qgroups that care about this 1642 + * reference, either the old_count in the case of removal or new_count 1643 + * in the case of an addition. 1644 + */ 1645 + ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 1646 + seq); 1647 + if (ret < 0) 1648 + goto out; 1649 + 1650 + /* 1651 + * ...in the case of removals. If we had a removal before we got around 1652 + * to processing this operation then we need to find that guy and count 1653 + * his references as if they really existed so we don't end up screwing 1654 + * up the exclusive counts. Then whenever we go to process the delete 1655 + * everything will be grand and we can account for whatever exclusive 1656 + * changes need to be made there. We also have to pass in old_roots so 1657 + * we have an accurate count of the roots as it pertains to this 1658 + * operations view of the world. 1659 + */ 1660 + ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 1661 + &old_roots); 1662 + if (ret < 0) 1663 + goto out; 1664 + 1665 + /* 1666 + * We are adding our root, need to adjust up the number of roots, 1667 + * otherwise old_roots is the number of roots we want. 1668 + */ 1669 + if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1670 + new_roots = old_roots + 1; 1671 + } else { 1672 + new_roots = old_roots; 1673 + old_roots++; 1674 + } 1675 + fs_info->qgroup_seq += old_roots + 1; 1676 + 1677 + 1678 + /* 1679 + * And now the magic happens, bless Arne for having a pretty elegant 1680 + * solution for this. 1681 + */ 1682 + qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 1683 + qgroups, seq, old_roots, new_roots, 0); 1684 + out: 1685 + spin_unlock(&fs_info->qgroup_lock); 1686 + ulist_free(qgroups); 1687 + ulist_free(roots); 1688 + ulist_free(tmp); 1689 + return ret; 1506 1690 } 1507 1691 1508 1692 /* ··· 1864 1342 * then the space is accounted accordingly to the different roots. The 1865 1343 * accounting algorithm works in 3 steps documented inline. 1866 1344 */ 1867 - int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, 1868 - struct btrfs_fs_info *fs_info, 1869 - struct btrfs_delayed_ref_node *node, 1870 - struct btrfs_delayed_extent_op *extent_op) 1345 + static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 1346 + struct btrfs_fs_info *fs_info, 1347 + struct btrfs_qgroup_operation *oper) 1871 1348 { 1872 - struct btrfs_root *quota_root; 1873 - u64 ref_root; 1874 - struct btrfs_qgroup *qgroup; 1875 - struct ulist *roots = NULL; 1876 - u64 seq; 1877 1349 int ret = 0; 1878 - int sgn; 1879 1350 1880 1351 if (!fs_info->quota_enabled) 1881 1352 return 0; 1882 1353 1883 1354 BUG_ON(!fs_info->quota_root); 1884 1355 1885 - if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1886 - node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 1887 - struct btrfs_delayed_tree_ref *ref; 1888 - ref = btrfs_delayed_node_to_tree_ref(node); 1889 - ref_root = ref->root; 1890 - } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1891 - node->type == BTRFS_SHARED_DATA_REF_KEY) { 1892 - struct btrfs_delayed_data_ref *ref; 1893 - ref = btrfs_delayed_node_to_data_ref(node); 1894 - ref_root = ref->root; 1895 - } else { 1896 - BUG(); 1897 - } 1898 - 1899 - if (!is_fstree(ref_root)) { 1900 - /* 1901 - * non-fs-trees are not being accounted 1902 - */ 1903 - return 0; 1904 - } 1905 - 1906 - switch (node->action) { 1907 - case BTRFS_ADD_DELAYED_REF: 1908 - case BTRFS_ADD_DELAYED_EXTENT: 1909 - sgn = 1; 1910 - seq = btrfs_tree_mod_seq_prev(node->seq); 1911 - break; 1912 - case BTRFS_DROP_DELAYED_REF: 1913 - sgn = -1; 1914 - seq = node->seq; 1915 - break; 1916 - case BTRFS_UPDATE_DELAYED_HEAD: 1917 - return 0; 1918 - default: 1919 - BUG(); 1920 - } 1921 - 1922 1356 mutex_lock(&fs_info->qgroup_rescan_lock); 1923 1357 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1924 - if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { 1358 + if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 1925 1359 mutex_unlock(&fs_info->qgroup_rescan_lock); 1926 1360 return 0; 1927 1361 } 1928 1362 } 1929 1363 mutex_unlock(&fs_info->qgroup_rescan_lock); 1930 1364 1931 - /* 1932 - * the delayed ref sequence number we pass depends on the direction of 1933 - * the operation. for add operations, we pass 1934 - * tree_mod_log_prev_seq(node->seq) to skip 1935 - * the delayed ref's current sequence number, because we need the state 1936 - * of the tree before the add operation. for delete operations, we pass 1937 - * (node->seq) to include the delayed ref's current sequence number, 1938 - * because we need the state of the tree after the delete operation. 1939 - */ 1940 - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); 1941 - if (ret < 0) 1942 - return ret; 1365 + ASSERT(is_fstree(oper->ref_root)); 1943 1366 1944 - spin_lock(&fs_info->qgroup_lock); 1367 + switch (oper->type) { 1368 + case BTRFS_QGROUP_OPER_ADD_EXCL: 1369 + case BTRFS_QGROUP_OPER_SUB_EXCL: 1370 + ret = qgroup_excl_accounting(fs_info, oper); 1371 + break; 1372 + case BTRFS_QGROUP_OPER_ADD_SHARED: 1373 + case BTRFS_QGROUP_OPER_SUB_SHARED: 1374 + ret = qgroup_shared_accounting(trans, fs_info, oper); 1375 + break; 1376 + default: 1377 + ASSERT(0); 1378 + } 1379 + return ret; 1380 + } 1945 1381 1946 - quota_root = fs_info->quota_root; 1947 - if (!quota_root) 1948 - goto unlock; 1382 + /* 1383 + * Needs to be called everytime we run delayed refs, even if there is an error 1384 + * in order to cleanup outstanding operations. 1385 + */ 1386 + int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 1387 + struct btrfs_fs_info *fs_info) 1388 + { 1389 + struct btrfs_qgroup_operation *oper; 1390 + int ret = 0; 1949 1391 1950 - qgroup = find_qgroup_rb(fs_info, ref_root); 1951 - if (!qgroup) 1952 - goto unlock; 1953 - 1954 - /* 1955 - * step 1: for each old ref, visit all nodes once and inc refcnt 1956 - */ 1957 - ulist_reinit(fs_info->qgroup_ulist); 1958 - seq = fs_info->qgroup_seq; 1959 - fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 1960 - 1961 - ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, 1962 - seq); 1963 - if (ret) 1964 - goto unlock; 1965 - 1966 - /* 1967 - * step 2: walk from the new root 1968 - */ 1969 - ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, 1970 - seq, sgn, node->num_bytes, qgroup); 1971 - if (ret) 1972 - goto unlock; 1973 - 1974 - /* 1975 - * step 3: walk again from old refs 1976 - */ 1977 - ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, 1978 - seq, sgn, node->num_bytes); 1979 - if (ret) 1980 - goto unlock; 1981 - 1982 - unlock: 1983 - spin_unlock(&fs_info->qgroup_lock); 1984 - ulist_free(roots); 1985 - 1392 + while (!list_empty(&trans->qgroup_ref_list)) { 1393 + oper = list_first_entry(&trans->qgroup_ref_list, 1394 + struct btrfs_qgroup_operation, list); 1395 + list_del_init(&oper->list); 1396 + if (!ret || !trans->aborted) 1397 + ret = btrfs_qgroup_account(trans, fs_info, oper); 1398 + spin_lock(&fs_info->qgroup_op_lock); 1399 + rb_erase(&oper->n, &fs_info->qgroup_op_tree); 1400 + spin_unlock(&fs_info->qgroup_op_lock); 1401 + btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1402 + kfree(oper); 1403 + } 1986 1404 return ret; 1987 1405 } 1988 1406 ··· 2091 1629 srcgroup = find_qgroup_rb(fs_info, srcid); 2092 1630 if (!srcgroup) 2093 1631 goto unlock; 2094 - dstgroup->rfer = srcgroup->rfer - level_size; 2095 - dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 1632 + 1633 + /* 1634 + * We call inherit after we clone the root in order to make sure 1635 + * our counts don't go crazy, so at this point the only 1636 + * difference between the two roots should be the root node. 1637 + */ 1638 + dstgroup->rfer = srcgroup->rfer; 1639 + dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 1640 + dstgroup->excl = level_size; 1641 + dstgroup->excl_cmpr = level_size; 2096 1642 srcgroup->excl = level_size; 2097 1643 srcgroup->excl_cmpr = level_size; 2098 1644 qgroup_dirty(fs_info, dstgroup); ··· 2204 1734 struct btrfs_qgroup *qg; 2205 1735 struct btrfs_qgroup_list *glist; 2206 1736 2207 - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1737 + qg = u64_to_ptr(unode->aux); 2208 1738 2209 1739 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2210 1740 qg->reserved + (s64)qg->rfer + num_bytes > ··· 2236 1766 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2237 1767 struct btrfs_qgroup *qg; 2238 1768 2239 - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1769 + qg = u64_to_ptr(unode->aux); 2240 1770 2241 1771 qg->reserved += num_bytes; 2242 1772 } ··· 2282 1812 struct btrfs_qgroup *qg; 2283 1813 struct btrfs_qgroup_list *glist; 2284 1814 2285 - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1815 + qg = u64_to_ptr(unode->aux); 2286 1816 2287 1817 qg->reserved -= num_bytes; 2288 1818 ··· 2318 1848 */ 2319 1849 static int 2320 1850 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2321 - struct btrfs_trans_handle *trans, struct ulist *tmp, 2322 - struct extent_buffer *scratch_leaf) 1851 + struct btrfs_trans_handle *trans, struct ulist *qgroups, 1852 + struct ulist *tmp, struct extent_buffer *scratch_leaf) 2323 1853 { 2324 1854 struct btrfs_key found; 2325 1855 struct ulist *roots = NULL; 2326 - struct ulist_node *unode; 2327 - struct ulist_iterator uiter; 2328 1856 struct seq_list tree_mod_seq_elem = {}; 1857 + u64 num_bytes; 2329 1858 u64 seq; 1859 + int new_roots; 2330 1860 int slot; 2331 1861 int ret; 2332 1862 ··· 2367 1897 mutex_unlock(&fs_info->qgroup_rescan_lock); 2368 1898 2369 1899 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2370 - u64 num_bytes; 2371 - 2372 1900 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2373 1901 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2374 1902 found.type != BTRFS_METADATA_ITEM_KEY) ··· 2376 1908 else 2377 1909 num_bytes = found.offset; 2378 1910 2379 - ret = btrfs_find_all_roots(trans, fs_info, found.objectid, 2380 - tree_mod_seq_elem.seq, &roots); 1911 + ulist_reinit(qgroups); 1912 + ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 1913 + &roots); 2381 1914 if (ret < 0) 2382 1915 goto out; 2383 1916 spin_lock(&fs_info->qgroup_lock); 2384 1917 seq = fs_info->qgroup_seq; 2385 1918 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2386 1919 2387 - ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); 2388 - if (ret) { 1920 + new_roots = 0; 1921 + ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, 1922 + seq, &new_roots, 1); 1923 + if (ret < 0) { 2389 1924 spin_unlock(&fs_info->qgroup_lock); 2390 1925 ulist_free(roots); 2391 1926 goto out; 2392 1927 } 2393 1928 2394 - /* 2395 - * step2 of btrfs_qgroup_account_ref works from a single root, 2396 - * we're doing all at once here. 2397 - */ 2398 - ulist_reinit(tmp); 2399 - ULIST_ITER_INIT(&uiter); 2400 - while ((unode = ulist_next(roots, &uiter))) { 2401 - struct btrfs_qgroup *qg; 2402 - 2403 - qg = find_qgroup_rb(fs_info, unode->val); 2404 - if (!qg) 2405 - continue; 2406 - 2407 - ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, 2408 - GFP_ATOMIC); 2409 - if (ret < 0) { 2410 - spin_unlock(&fs_info->qgroup_lock); 2411 - ulist_free(roots); 2412 - goto out; 2413 - } 1929 + ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, 1930 + seq, 0, new_roots, 1); 1931 + if (ret < 0) { 1932 + spin_unlock(&fs_info->qgroup_lock); 1933 + ulist_free(roots); 1934 + goto out; 2414 1935 } 2415 - 2416 - /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ 2417 - ULIST_ITER_INIT(&uiter); 2418 - while ((unode = ulist_next(tmp, &uiter))) { 2419 - struct btrfs_qgroup *qg; 2420 - struct btrfs_qgroup_list *glist; 2421 - 2422 - qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; 2423 - qg->rfer += num_bytes; 2424 - qg->rfer_cmpr += num_bytes; 2425 - WARN_ON(qg->tag >= seq); 2426 - if (qg->refcnt - seq == roots->nnodes) { 2427 - qg->excl += num_bytes; 2428 - qg->excl_cmpr += num_bytes; 2429 - } 2430 - qgroup_dirty(fs_info, qg); 2431 - 2432 - list_for_each_entry(glist, &qg->groups, next_group) { 2433 - ret = ulist_add(tmp, glist->group->qgroupid, 2434 - (uintptr_t)glist->group, 2435 - GFP_ATOMIC); 2436 - if (ret < 0) { 2437 - spin_unlock(&fs_info->qgroup_lock); 2438 - ulist_free(roots); 2439 - goto out; 2440 - } 2441 - } 2442 - } 2443 - 2444 1936 spin_unlock(&fs_info->qgroup_lock); 2445 1937 ulist_free(roots); 2446 - ret = 0; 2447 1938 } 2448 - 2449 1939 out: 2450 1940 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2451 1941 ··· 2416 1990 qgroup_rescan_work); 2417 1991 struct btrfs_path *path; 2418 1992 struct btrfs_trans_handle *trans = NULL; 2419 - struct ulist *tmp = NULL; 1993 + struct ulist *tmp = NULL, *qgroups = NULL; 2420 1994 struct extent_buffer *scratch_leaf = NULL; 2421 1995 int err = -ENOMEM; 2422 1996 2423 1997 path = btrfs_alloc_path(); 2424 1998 if (!path) 1999 + goto out; 2000 + qgroups = ulist_alloc(GFP_NOFS); 2001 + if (!qgroups) 2425 2002 goto out; 2426 2003 tmp = ulist_alloc(GFP_NOFS); 2427 2004 if (!tmp) ··· 2444 2015 err = -EINTR; 2445 2016 } else { 2446 2017 err = qgroup_rescan_leaf(fs_info, path, trans, 2447 - tmp, scratch_leaf); 2018 + qgroups, tmp, scratch_leaf); 2448 2019 } 2449 2020 if (err > 0) 2450 2021 btrfs_commit_transaction(trans, fs_info->fs_root); ··· 2454 2025 2455 2026 out: 2456 2027 kfree(scratch_leaf); 2457 - ulist_free(tmp); 2028 + ulist_free(qgroups); 2458 2029 btrfs_free_path(path); 2459 2030 2460 2031 mutex_lock(&fs_info->qgroup_rescan_lock);
+107
fs/btrfs/qgroup.h
··· 1 + /* 2 + * Copyright (C) 2014 Facebook. All rights reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public 6 + * License v2 as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public 14 + * License along with this program; if not, write to the 15 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 + * Boston, MA 021110-1307, USA. 17 + */ 18 + 19 + #ifndef __BTRFS_QGROUP__ 20 + #define __BTRFS_QGROUP__ 21 + 22 + /* 23 + * A description of the operations, all of these operations only happen when we 24 + * are adding the 1st reference for that subvolume in the case of adding space 25 + * or on the last reference delete in the case of subtraction. The only 26 + * exception is the last one, which is added for confusion. 27 + * 28 + * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only 29 + * one pointing at the bytes we are adding. This is called on the first 30 + * allocation. 31 + * 32 + * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be 33 + * shared between subvols. This is called on the creation of a ref that already 34 + * has refs from a different subvolume, so basically reflink. 35 + * 36 + * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only 37 + * one referencing the range. 38 + * 39 + * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with 40 + * refs with other subvolumes. 41 + */ 42 + enum btrfs_qgroup_operation_type { 43 + BTRFS_QGROUP_OPER_ADD_EXCL, 44 + BTRFS_QGROUP_OPER_ADD_SHARED, 45 + BTRFS_QGROUP_OPER_SUB_EXCL, 46 + BTRFS_QGROUP_OPER_SUB_SHARED, 47 + }; 48 + 49 + struct btrfs_qgroup_operation { 50 + u64 ref_root; 51 + u64 bytenr; 52 + u64 num_bytes; 53 + u64 seq; 54 + enum btrfs_qgroup_operation_type type; 55 + struct seq_list elem; 56 + struct rb_node n; 57 + struct list_head list; 58 + }; 59 + 60 + int btrfs_quota_enable(struct btrfs_trans_handle *trans, 61 + struct btrfs_fs_info *fs_info); 62 + int btrfs_quota_disable(struct btrfs_trans_handle *trans, 63 + struct btrfs_fs_info *fs_info); 64 + int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 65 + void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 66 + int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); 67 + int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 68 + struct btrfs_fs_info *fs_info, u64 src, u64 dst); 69 + int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 70 + struct btrfs_fs_info *fs_info, u64 src, u64 dst); 71 + int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 72 + struct btrfs_fs_info *fs_info, u64 qgroupid, 73 + char *name); 74 + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 75 + struct btrfs_fs_info *fs_info, u64 qgroupid); 76 + int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 77 + struct btrfs_fs_info *fs_info, u64 qgroupid, 78 + struct btrfs_qgroup_limit *limit); 79 + int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 80 + void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 81 + struct btrfs_delayed_extent_op; 82 + int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 83 + struct btrfs_fs_info *fs_info, u64 ref_root, 84 + u64 bytenr, u64 num_bytes, 85 + enum btrfs_qgroup_operation_type type, 86 + int mod_seq); 87 + int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 88 + struct btrfs_fs_info *fs_info); 89 + void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans, 90 + struct btrfs_fs_info *fs_info, 91 + struct btrfs_qgroup_operation *oper); 92 + int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 93 + struct btrfs_fs_info *fs_info); 94 + int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 95 + struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 96 + struct btrfs_qgroup_inherit *inherit); 97 + int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); 98 + void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); 99 + 100 + void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); 101 + 102 + #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 103 + int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 104 + u64 rfer, u64 excl); 105 + #endif 106 + 107 + #endif /* __BTRFS_QGROUP__ */
+26 -26
fs/btrfs/transaction.c
··· 31 31 #include "inode-map.h" 32 32 #include "volumes.h" 33 33 #include "dev-replace.h" 34 + #include "qgroup.h" 34 35 35 36 #define BTRFS_ROOT_TRANS_TAG 0 36 37 ··· 704 703 return 0; 705 704 } 706 705 707 - /* 708 - * do the qgroup accounting as early as possible 709 - */ 710 - err = btrfs_delayed_refs_qgroup_accounting(trans, info); 711 - 712 706 btrfs_trans_release_metadata(trans, root); 713 707 trans->block_rsv = NULL; 714 - 715 - if (trans->qgroup_reserved) { 716 - /* 717 - * the same root has to be passed here between start_transaction 718 - * and end_transaction. Subvolume quota depends on this. 719 - */ 720 - btrfs_qgroup_free(trans->root, trans->qgroup_reserved); 721 - trans->qgroup_reserved = 0; 722 - } 723 708 724 709 if (!list_empty(&trans->new_bgs)) 725 710 btrfs_create_pending_block_groups(trans, root); ··· 715 728 cur = max_t(unsigned long, cur, 32); 716 729 trans->delayed_ref_updates = 0; 717 730 btrfs_run_delayed_refs(trans, root, cur); 731 + } 732 + 733 + if (trans->qgroup_reserved) { 734 + /* 735 + * the same root has to be passed here between start_transaction 736 + * and end_transaction. Subvolume quota depends on this. 737 + */ 738 + btrfs_qgroup_free(trans->root, trans->qgroup_reserved); 739 + trans->qgroup_reserved = 0; 718 740 } 719 741 720 742 btrfs_trans_release_metadata(trans, root); ··· 1165 1169 goto no_free_objectid; 1166 1170 } 1167 1171 1168 - pending->error = btrfs_qgroup_inherit(trans, fs_info, 1169 - root->root_key.objectid, 1170 - objectid, pending->inherit); 1171 - if (pending->error) 1172 - goto no_free_objectid; 1173 - 1174 1172 key.objectid = objectid; 1175 1173 key.offset = (u64)-1; 1176 1174 key.type = BTRFS_ROOT_ITEM_KEY; ··· 1260 1270 btrfs_abort_transaction(trans, root, ret); 1261 1271 goto fail; 1262 1272 } 1273 + 1274 + /* 1275 + * We need to flush delayed refs in order to make sure all of our quota 1276 + * operations have been done before we call btrfs_qgroup_inherit. 1277 + */ 1278 + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1279 + if (ret) { 1280 + btrfs_abort_transaction(trans, root, ret); 1281 + goto fail; 1282 + } 1283 + 1284 + pending->error = btrfs_qgroup_inherit(trans, fs_info, 1285 + root->root_key.objectid, 1286 + objectid, pending->inherit); 1287 + if (pending->error) 1288 + goto no_free_objectid; 1263 1289 1264 1290 /* see comments in should_cow_block() */ 1265 1291 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); ··· 1605 1599 * them now so that they hinder processing of more delayed refs 1606 1600 * as little as possible. 1607 1601 */ 1608 - if (ret) { 1609 - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 1610 - return ret; 1611 - } 1612 - 1613 - ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 1614 1602 if (ret) 1615 1603 return ret; 1616 1604