Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: qgroup: use xarray to track dirty extents in transaction

Use xarray to track dirty extents to reduce the size of the struct
btrfs_qgroup_extent_record from 64 bytes to 40 bytes. The xarray is
more cache line friendly, it also reduces the complexity of insertion
and search code compared to rb tree.

Another change introduced is about error handling. Before this patch,
the result of btrfs_qgroup_trace_extent_nolock() is always a success. In
this patch, because of this function calls the function xa_store() which
has the possibility to fail, so mark qgroup as inconsistent if error
happened and then free preallocated memory. Also we preallocate memory
before spin_lock(), if memory preallcation failed, error handling is the
same the existing code.

Suggested-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Junchao Sun and committed by
David Sterba
3cce39a8 14ed830d

+50 -43
+14 -3
fs/btrfs/delayed-ref.c
··· 855 855 856 856 /* Record qgroup extent info if provided */ 857 857 if (qrecord) { 858 - if (btrfs_qgroup_trace_extent_nolock(trans->fs_info, 859 - delayed_refs, qrecord)) 858 + int ret; 859 + 860 + ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info, 861 + delayed_refs, qrecord); 862 + if (ret) { 863 + /* Clean up if insertion fails or item exists. */ 864 + xa_release(&delayed_refs->dirty_extents, qrecord->bytenr); 860 865 kfree(qrecord); 861 - else 866 + } else { 862 867 qrecord_inserted = true; 868 + } 863 869 } 864 870 865 871 trace_add_delayed_ref_head(trans->fs_info, head_ref, action); ··· 1018 1012 record = kzalloc(sizeof(*record), GFP_NOFS); 1019 1013 if (!record) 1020 1014 goto free_head_ref; 1015 + if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, 1016 + generic_ref->bytenr, GFP_NOFS)) 1017 + goto free_record; 1021 1018 } 1022 1019 1023 1020 init_delayed_ref_common(fs_info, node, generic_ref); ··· 1057 1048 return btrfs_qgroup_trace_extent_post(trans, record); 1058 1049 return 0; 1059 1050 1051 + free_record: 1052 + kfree(record); 1060 1053 free_head_ref: 1061 1054 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 1062 1055 free_node:
+2 -2
fs/btrfs/delayed-ref.h
··· 202 202 /* head ref rbtree */ 203 203 struct rb_root_cached href_root; 204 204 205 - /* dirty extent records */ 206 - struct rb_root dirty_extent_root; 205 + /* Track dirty extent records. */ 206 + struct xarray dirty_extents; 207 207 208 208 /* this spin lock protects the rbtree and the entries inside */ 209 209 spinlock_t lock;
+32 -34
fs/btrfs/qgroup.c
··· 1998 1998 * 1999 1999 * Return 0 for success insert 2000 2000 * Return >0 for existing record, caller can free @record safely. 2001 - * Error is not possible 2001 + * Return <0 for insertion failure, caller can free @record safely. 2002 2002 */ 2003 2003 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 2004 2004 struct btrfs_delayed_ref_root *delayed_refs, 2005 2005 struct btrfs_qgroup_extent_record *record) 2006 2006 { 2007 - struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 2008 - struct rb_node *parent_node = NULL; 2009 - struct btrfs_qgroup_extent_record *entry; 2010 - u64 bytenr = record->bytenr; 2007 + struct btrfs_qgroup_extent_record *existing, *ret; 2008 + unsigned long bytenr = record->bytenr; 2011 2009 2012 2010 if (!btrfs_qgroup_full_accounting(fs_info)) 2013 2011 return 1; ··· 2013 2015 lockdep_assert_held(&delayed_refs->lock); 2014 2016 trace_btrfs_qgroup_trace_extent(fs_info, record); 2015 2017 2016 - while (*p) { 2017 - parent_node = *p; 2018 - entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 2019 - node); 2020 - if (bytenr < entry->bytenr) { 2021 - p = &(*p)->rb_left; 2022 - } else if (bytenr > entry->bytenr) { 2023 - p = &(*p)->rb_right; 2024 - } else { 2025 - if (record->data_rsv && !entry->data_rsv) { 2026 - entry->data_rsv = record->data_rsv; 2027 - entry->data_rsv_refroot = 2028 - record->data_rsv_refroot; 2029 - } 2030 - return 1; 2018 + xa_lock(&delayed_refs->dirty_extents); 2019 + existing = xa_load(&delayed_refs->dirty_extents, bytenr); 2020 + if (existing) { 2021 + if (record->data_rsv && !existing->data_rsv) { 2022 + existing->data_rsv = record->data_rsv; 2023 + existing->data_rsv_refroot = record->data_rsv_refroot; 2031 2024 } 2025 + xa_unlock(&delayed_refs->dirty_extents); 2026 + return 1; 2032 2027 } 2033 2028 2034 - rb_link_node(&record->node, parent_node, p); 2035 - rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 2029 + ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC); 2030 + xa_unlock(&delayed_refs->dirty_extents); 2031 + if (xa_is_err(ret)) { 2032 + qgroup_mark_inconsistent(fs_info); 2033 + return xa_err(ret); 2034 + } 2035 + 2036 2036 return 0; 2037 2037 } 2038 2038 ··· 2137 2141 if (!record) 2138 2142 return -ENOMEM; 2139 2143 2144 + if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) { 2145 + kfree(record); 2146 + return -ENOMEM; 2147 + } 2148 + 2140 2149 delayed_refs = &trans->transaction->delayed_refs; 2141 2150 record->bytenr = bytenr; 2142 2151 record->num_bytes = num_bytes; ··· 2150 2149 spin_lock(&delayed_refs->lock); 2151 2150 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 2152 2151 spin_unlock(&delayed_refs->lock); 2153 - if (ret > 0) { 2152 + if (ret) { 2153 + /* Clean up if insertion fails or item exists. */ 2154 + xa_release(&delayed_refs->dirty_extents, record->bytenr); 2154 2155 kfree(record); 2155 2156 return 0; 2156 2157 } ··· 3021 3018 struct btrfs_qgroup_extent_record *record; 3022 3019 struct btrfs_delayed_ref_root *delayed_refs; 3023 3020 struct ulist *new_roots = NULL; 3024 - struct rb_node *node; 3021 + unsigned long index; 3025 3022 u64 num_dirty_extents = 0; 3026 3023 u64 qgroup_to_skip; 3027 3024 int ret = 0; ··· 3031 3028 3032 3029 delayed_refs = &trans->transaction->delayed_refs; 3033 3030 qgroup_to_skip = delayed_refs->qgroup_to_skip; 3034 - while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 3035 - record = rb_entry(node, struct btrfs_qgroup_extent_record, 3036 - node); 3037 - 3031 + xa_for_each(&delayed_refs->dirty_extents, index, record) { 3038 3032 num_dirty_extents++; 3039 3033 trace_btrfs_qgroup_account_extents(fs_info, record); 3040 3034 ··· 3097 3097 ulist_free(record->old_roots); 3098 3098 ulist_free(new_roots); 3099 3099 new_roots = NULL; 3100 - rb_erase(node, &delayed_refs->dirty_extent_root); 3100 + xa_erase(&delayed_refs->dirty_extents, index); 3101 3101 kfree(record); 3102 3102 3103 3103 } ··· 4874 4874 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) 4875 4875 { 4876 4876 struct btrfs_qgroup_extent_record *entry; 4877 - struct btrfs_qgroup_extent_record *next; 4878 - struct rb_root *root; 4877 + unsigned long index; 4879 4878 4880 - root = &trans->delayed_refs.dirty_extent_root; 4881 - rbtree_postorder_for_each_entry_safe(entry, next, root, node) { 4879 + xa_for_each(&trans->delayed_refs.dirty_extents, index, entry) { 4882 4880 ulist_free(entry->old_roots); 4883 4881 kfree(entry); 4884 4882 } 4885 - *root = RB_ROOT; 4883 + xa_destroy(&trans->delayed_refs.dirty_extents); 4886 4884 } 4887 4885 4888 4886 void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
-1
fs/btrfs/qgroup.h
··· 125 125 * Record a dirty extent, and info qgroup to update quota on it 126 126 */ 127 127 struct btrfs_qgroup_extent_record { 128 - struct rb_node node; 129 128 u64 bytenr; 130 129 u64 num_bytes; 131 130
+2 -3
fs/btrfs/transaction.c
··· 143 143 BUG_ON(!list_empty(&transaction->list)); 144 144 WARN_ON(!RB_EMPTY_ROOT( 145 145 &transaction->delayed_refs.href_root.rb_root)); 146 - WARN_ON(!RB_EMPTY_ROOT( 147 - &transaction->delayed_refs.dirty_extent_root)); 146 + WARN_ON(!xa_empty(&transaction->delayed_refs.dirty_extents)); 148 147 if (transaction->delayed_refs.pending_csums) 149 148 btrfs_err(transaction->fs_info, 150 149 "pending csums is %llu", ··· 350 351 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); 351 352 352 353 cur_trans->delayed_refs.href_root = RB_ROOT_CACHED; 353 - cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; 354 + xa_init(&cur_trans->delayed_refs.dirty_extents); 354 355 atomic_set(&cur_trans->delayed_refs.num_entries, 0); 355 356 356 357 /*