Merge tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"This contains a few fixups to the qgroup patches that were merged this
dev cycle, unaligned access fix, blockgroup removal corner case fix
and a small debugging output tweak"

* tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: print-tree: debugging output enhancement
btrfs: Fix race condition between delayed refs and blockgroup removal
btrfs: fix unaligned access in readdir
btrfs: Fix wrong btrfs_delalloc_release_extents parameter
btrfs: delayed-inode: Remove wrong qgroup meta reservation calls
btrfs: qgroup: Use independent and accurate per inode qgroup rsv
btrfs: qgroup: Commit transaction in advance to reduce early EDQUOT

+25
fs/btrfs/ctree.h
··· 459 459 unsigned short full; 460 460 unsigned short type; 461 461 unsigned short failfast; 462 + 463 + /* 464 + * Qgroup equivalent for @size @reserved 465 + * 466 + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care 467 + * about things like csum size nor how many tree blocks it will need to 468 + * reserve. 469 + * 470 + * Qgroup cares more about net change of the extent usage. 471 + * 472 + * So for one newly inserted file extent, in worst case it will cause 473 + * leaf split and level increase, nodesize for each file extent is 474 + * already too much. 475 + * 476 + * In short, qgroup_size/reserved is the upper limit of possible needed 477 + * qgroup metadata reservation. 478 + */ 479 + u64 qgroup_rsv_size; 480 + u64 qgroup_rsv_reserved; 462 481 }; 463 482 464 483 /* ··· 732 713 * (device replace, resize, device add/delete, balance) 733 714 */ 734 715 #define BTRFS_FS_EXCL_OP 16 716 + 717 + /* 718 + * To info transaction_kthread we need an immediate commit so it doesn't 719 + * need to wait for commit_interval 720 + */ 721 + #define BTRFS_FS_NEED_ASYNC_COMMIT 17 735 722 736 723 struct btrfs_fs_info { 737 724 u8 fsid[BTRFS_FSID_SIZE];
+16 -4
fs/btrfs/delayed-inode.c
··· 556 556 dst_rsv = &fs_info->delayed_block_rsv; 557 557 558 558 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 559 + 560 + /* 561 + * Here we migrate space rsv from transaction rsv, since have already 562 + * reserved space when starting a transaction. So no need to reserve 563 + * qgroup space here. 564 + */ 559 565 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); 560 566 if (!ret) { 561 567 trace_btrfs_space_reservation(fs_info, "delayed_item", ··· 583 577 return; 584 578 585 579 rsv = &fs_info->delayed_block_rsv; 586 - btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); 580 + /* 581 + * Check btrfs_delayed_item_reserve_metadata() to see why we don't need 582 + * to release/reserve qgroup space. 583 + */ 587 584 trace_btrfs_space_reservation(fs_info, "delayed_item", 588 585 item->key.objectid, item->bytes_reserved, 589 586 0); ··· 611 602 612 603 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 613 604 614 - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); 615 - if (ret < 0) 616 - return ret; 617 605 /* 618 606 * btrfs_dirty_inode will update the inode under btrfs_join_transaction 619 607 * which doesn't reserve space for speed. This is a problem since we ··· 622 616 */ 623 617 if (!src_rsv || (!trans->bytes_reserved && 624 618 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { 619 + ret = btrfs_qgroup_reserve_meta_prealloc(root, 620 + fs_info->nodesize, true); 621 + if (ret < 0) 622 + return ret; 625 623 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, 626 624 BTRFS_RESERVE_NO_FLUSH); 627 625 /* ··· 644 634 "delayed_inode", 645 635 btrfs_ino(inode), 646 636 num_bytes, 1); 637 + } else { 638 + btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); 647 639 } 648 640 return ret; 649 641 }
+14 -5
fs/btrfs/delayed-ref.c
··· 540 540 struct btrfs_delayed_ref_head *head_ref, 541 541 struct btrfs_qgroup_extent_record *qrecord, 542 542 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, 543 - int action, int is_data, int *qrecord_inserted_ret, 543 + int action, int is_data, int is_system, 544 + int *qrecord_inserted_ret, 544 545 int *old_ref_mod, int *new_ref_mod) 546 + 545 547 { 546 548 struct btrfs_delayed_ref_head *existing; 547 549 struct btrfs_delayed_ref_root *delayed_refs; ··· 587 585 head_ref->ref_mod = count_mod; 588 586 head_ref->must_insert_reserved = must_insert_reserved; 589 587 head_ref->is_data = is_data; 588 + head_ref->is_system = is_system; 590 589 head_ref->ref_tree = RB_ROOT; 591 590 INIT_LIST_HEAD(&head_ref->ref_add_list); 592 591 RB_CLEAR_NODE(&head_ref->href_node); ··· 775 772 struct btrfs_delayed_ref_root *delayed_refs; 776 773 struct btrfs_qgroup_extent_record *record = NULL; 777 774 int qrecord_inserted; 775 + int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); 778 776 779 777 BUG_ON(extent_op && extent_op->is_data); 780 778 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); ··· 804 800 */ 805 801 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, 806 802 bytenr, num_bytes, 0, 0, action, 0, 807 - &qrecord_inserted, old_ref_mod, 808 - new_ref_mod); 803 + is_system, &qrecord_inserted, 804 + old_ref_mod, new_ref_mod); 809 805 810 806 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, 811 807 num_bytes, parent, ref_root, level, action); ··· 872 868 */ 873 869 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, 874 870 bytenr, num_bytes, ref_root, reserved, 875 - action, 1, &qrecord_inserted, 871 + action, 1, 0, &qrecord_inserted, 876 872 old_ref_mod, new_ref_mod); 877 873 878 874 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, ··· 902 898 delayed_refs = &trans->transaction->delayed_refs; 903 899 spin_lock(&delayed_refs->lock); 904 900 901 + /* 902 + * extent_ops just modify the flags of an extent and they don't result 903 + * in ref count changes, hence it's safe to pass false/0 for is_system 904 + * argument 905 + */ 905 906 add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, 906 907 num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, 907 - extent_op->is_data, NULL, NULL, NULL); 908 + extent_op->is_data, 0, NULL, NULL, NULL); 908 909 909 910 spin_unlock(&delayed_refs->lock); 910 911 return 0;
+1
fs/btrfs/delayed-ref.h
··· 127 127 */ 128 128 unsigned int must_insert_reserved:1; 129 129 unsigned int is_data:1; 130 + unsigned int is_system:1; 130 131 unsigned int processing:1; 131 132 }; 132 133
+1
fs/btrfs/disk-io.c
··· 1824 1824 1825 1825 now = get_seconds(); 1826 1826 if (cur->state < TRANS_STATE_BLOCKED && 1827 + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && 1827 1828 (now < cur->start_time || 1828 1829 now - cur->start_time < fs_info->commit_interval)) { 1829 1830 spin_unlock(&fs_info->trans_lock);
+57 -16
fs/btrfs/extent-tree.c
··· 2601 2601 trace_run_delayed_ref_head(fs_info, head, 0); 2602 2602 2603 2603 if (head->total_ref_mod < 0) { 2604 - struct btrfs_block_group_cache *cache; 2604 + struct btrfs_space_info *space_info; 2605 + u64 flags; 2605 2606 2606 - cache = btrfs_lookup_block_group(fs_info, head->bytenr); 2607 - ASSERT(cache); 2608 - percpu_counter_add(&cache->space_info->total_bytes_pinned, 2607 + if (head->is_data) 2608 + flags = BTRFS_BLOCK_GROUP_DATA; 2609 + else if (head->is_system) 2610 + flags = BTRFS_BLOCK_GROUP_SYSTEM; 2611 + else 2612 + flags = BTRFS_BLOCK_GROUP_METADATA; 2613 + space_info = __find_space_info(fs_info, flags); 2614 + ASSERT(space_info); 2615 + percpu_counter_add(&space_info->total_bytes_pinned, 2609 2616 -head->num_bytes); 2610 - btrfs_put_block_group(cache); 2611 2617 2612 2618 if (head->is_data) { 2613 2619 spin_lock(&delayed_refs->lock); ··· 5565 5559 5566 5560 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, 5567 5561 struct btrfs_block_rsv *block_rsv, 5568 - struct btrfs_block_rsv *dest, u64 num_bytes) 5562 + struct btrfs_block_rsv *dest, u64 num_bytes, 5563 + u64 *qgroup_to_release_ret) 5569 5564 { 5570 5565 struct btrfs_space_info *space_info = block_rsv->space_info; 5566 + u64 qgroup_to_release = 0; 5571 5567 u64 ret; 5572 5568 5573 5569 spin_lock(&block_rsv->lock); 5574 - if (num_bytes == (u64)-1) 5570 + if (num_bytes == (u64)-1) { 5575 5571 num_bytes = block_rsv->size; 5572 + qgroup_to_release = block_rsv->qgroup_rsv_size; 5573 + } 5576 5574 block_rsv->size -= num_bytes; 5577 5575 if (block_rsv->reserved >= block_rsv->size) { 5578 5576 num_bytes = block_rsv->reserved - block_rsv->size; ··· 5584 5574 block_rsv->full = 1; 5585 5575 } else { 5586 5576 num_bytes = 0; 5577 + } 5578 + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { 5579 + qgroup_to_release = block_rsv->qgroup_rsv_reserved - 5580 + block_rsv->qgroup_rsv_size; 5581 + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; 5582 + } else { 5583 + qgroup_to_release = 0; 5587 5584 } 5588 5585 spin_unlock(&block_rsv->lock); 5589 5586 ··· 5614 5597 space_info_add_old_bytes(fs_info, space_info, 5615 5598 num_bytes); 5616 5599 } 5600 + if (qgroup_to_release_ret) 5601 + *qgroup_to_release_ret = qgroup_to_release; 5617 5602 return ret; 5618 5603 } 5619 5604 ··· 5757 5738 struct btrfs_root *root = inode->root; 5758 5739 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 5759 5740 u64 num_bytes = 0; 5741 + u64 qgroup_num_bytes = 0; 5760 5742 int ret = -ENOSPC; 5761 5743 5762 5744 spin_lock(&block_rsv->lock); 5763 5745 if (block_rsv->reserved < block_rsv->size) 5764 5746 num_bytes = block_rsv->size - block_rsv->reserved; 5747 + if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) 5748 + qgroup_num_bytes = block_rsv->qgroup_rsv_size - 5749 + block_rsv->qgroup_rsv_reserved; 5765 5750 spin_unlock(&block_rsv->lock); 5766 5751 5767 5752 if (num_bytes == 0) 5768 5753 return 0; 5769 5754 5770 - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); 5755 + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); 5771 5756 if (ret) 5772 5757 return ret; 5773 5758 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); ··· 5779 5756 block_rsv_add_bytes(block_rsv, num_bytes, 0); 5780 5757 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5781 5758 btrfs_ino(inode), num_bytes, 1); 5782 - } 5759 + 5760 + /* Don't forget to increase qgroup_rsv_reserved */ 5761 + spin_lock(&block_rsv->lock); 5762 + block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; 5763 + spin_unlock(&block_rsv->lock); 5764 + } else 5765 + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); 5783 5766 return ret; 5784 5767 } 5785 5768 ··· 5806 5777 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 5807 5778 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 5808 5779 u64 released = 0; 5780 + u64 qgroup_to_release = 0; 5809 5781 5810 5782 /* 5811 5783 * Since we statically set the block_rsv->size we just want to say we 5812 5784 * are releasing 0 bytes, and then we'll just get the reservation over 5813 5785 * the size free'd. 5814 5786 */ 5815 - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); 5787 + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, 5788 + &qgroup_to_release); 5816 5789 if (released > 0) 5817 5790 trace_btrfs_space_reservation(fs_info, "delalloc", 5818 5791 btrfs_ino(inode), released, 0); 5819 5792 if (qgroup_free) 5820 - btrfs_qgroup_free_meta_prealloc(inode->root, released); 5793 + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); 5821 5794 else 5822 - btrfs_qgroup_convert_reserved_meta(inode->root, released); 5795 + btrfs_qgroup_convert_reserved_meta(inode->root, 5796 + qgroup_to_release); 5823 5797 } 5824 5798 5825 5799 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, ··· 5834 5802 if (global_rsv == block_rsv || 5835 5803 block_rsv->space_info != global_rsv->space_info) 5836 5804 global_rsv = NULL; 5837 - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); 5805 + block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); 5838 5806 } 5839 5807 5840 5808 static void update_global_block_rsv(struct btrfs_fs_info *fs_info) ··· 5914 5882 static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 5915 5883 { 5916 5884 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, 5917 - (u64)-1); 5885 + (u64)-1, NULL); 5918 5886 WARN_ON(fs_info->trans_block_rsv.size > 0); 5919 5887 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 5920 5888 WARN_ON(fs_info->chunk_block_rsv.size > 0); ··· 5938 5906 WARN_ON_ONCE(!list_empty(&trans->new_bgs)); 5939 5907 5940 5908 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, 5941 - trans->chunk_bytes_reserved); 5909 + trans->chunk_bytes_reserved, NULL); 5942 5910 trans->chunk_bytes_reserved = 0; 5943 5911 } 5944 5912 ··· 6043 6011 { 6044 6012 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 6045 6013 u64 reserve_size = 0; 6014 + u64 qgroup_rsv_size = 0; 6046 6015 u64 csum_leaves; 6047 6016 unsigned outstanding_extents; 6048 6017 ··· 6056 6023 inode->csum_bytes); 6057 6024 reserve_size += btrfs_calc_trans_metadata_size(fs_info, 6058 6025 csum_leaves); 6026 + /* 6027 + * For qgroup rsv, the calculation is very simple: 6028 + * account one nodesize for each outstanding extent 6029 + * 6030 + * This is overestimating in most cases. 6031 + */ 6032 + qgroup_rsv_size = outstanding_extents * fs_info->nodesize; 6059 6033 6060 6034 spin_lock(&block_rsv->lock); 6061 6035 block_rsv->size = reserve_size; 6036 + block_rsv->qgroup_rsv_size = qgroup_rsv_size; 6062 6037 spin_unlock(&block_rsv->lock); 6063 6038 } 6064 6039 ··· 8444 8403 struct btrfs_block_rsv *block_rsv, u32 blocksize) 8445 8404 { 8446 8405 block_rsv_add_bytes(block_rsv, blocksize, 0); 8447 - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); 8406 + block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); 8448 8407 } 8449 8408 8450 8409 /*
+1 -1
fs/btrfs/file.c
··· 1748 1748 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1749 1749 lockstart, lockend, &cached_state); 1750 1750 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, 1751 - (ret != 0)); 1751 + true); 1752 1752 if (ret) { 1753 1753 btrfs_drop_pages(pages, num_pages); 1754 1754 break;
+12 -8
fs/btrfs/inode.c
··· 31 31 #include <linux/uio.h> 32 32 #include <linux/magic.h> 33 33 #include <linux/iversion.h> 34 + #include <asm/unaligned.h> 34 35 #include "ctree.h" 35 36 #include "disk-io.h" 36 37 #include "transaction.h" ··· 5906 5905 struct dir_entry *entry = addr; 5907 5906 char *name = (char *)(entry + 1); 5908 5907 5909 - ctx->pos = entry->offset; 5910 - if (!dir_emit(ctx, name, entry->name_len, entry->ino, 5911 - entry->type)) 5908 + ctx->pos = get_unaligned(&entry->offset); 5909 + if (!dir_emit(ctx, name, get_unaligned(&entry->name_len), 5910 + get_unaligned(&entry->ino), 5911 + get_unaligned(&entry->type))) 5912 5912 return 1; 5913 - addr += sizeof(struct dir_entry) + entry->name_len; 5913 + addr += sizeof(struct dir_entry) + 5914 + get_unaligned(&entry->name_len); 5914 5915 ctx->pos++; 5915 5916 } 5916 5917 return 0; ··· 6002 5999 } 6003 6000 6004 6001 entry = addr; 6005 - entry->name_len = name_len; 6002 + put_unaligned(name_len, &entry->name_len); 6006 6003 name_ptr = (char *)(entry + 1); 6007 6004 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), 6008 6005 name_len); 6009 - entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 6006 + put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], 6007 + &entry->type); 6010 6008 btrfs_dir_item_key_to_cpu(leaf, di, &location); 6011 - entry->ino = location.objectid; 6012 - entry->offset = found_key.offset; 6009 + put_unaligned(location.objectid, &entry->ino); 6010 + put_unaligned(found_key.offset, &entry->offset); 6013 6011 entries++; 6014 6012 addr += sizeof(struct dir_entry) + name_len; 6015 6013 total_len += sizeof(struct dir_entry) + name_len;
+15 -10
fs/btrfs/print-tree.c
··· 189 189 fs_info = l->fs_info; 190 190 nr = btrfs_header_nritems(l); 191 191 192 - btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", 193 - btrfs_header_bytenr(l), nr, 194 - btrfs_leaf_free_space(fs_info, l)); 192 + btrfs_info(fs_info, 193 + "leaf %llu gen %llu total ptrs %d free space %d owner %llu", 194 + btrfs_header_bytenr(l), btrfs_header_generation(l), nr, 195 + btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); 195 196 for (i = 0 ; i < nr ; i++) { 196 197 item = btrfs_item_nr(i); 197 198 btrfs_item_key_to_cpu(l, &key, i); ··· 326 325 } 327 326 } 328 327 329 - void btrfs_print_tree(struct extent_buffer *c) 328 + void btrfs_print_tree(struct extent_buffer *c, bool follow) 330 329 { 331 330 struct btrfs_fs_info *fs_info; 332 331 int i; u32 nr; ··· 343 342 return; 344 343 } 345 344 btrfs_info(fs_info, 346 - "node %llu level %d total ptrs %d free spc %u", 347 - btrfs_header_bytenr(c), level, nr, 348 - (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); 345 + "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu", 346 + btrfs_header_bytenr(c), level, btrfs_header_generation(c), 347 + nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr, 348 + btrfs_header_owner(c)); 349 349 for (i = 0; i < nr; i++) { 350 350 btrfs_node_key_to_cpu(c, &key, i); 351 - pr_info("\tkey %d (%llu %u %llu) block %llu\n", 351 + pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", 352 352 i, key.objectid, key.type, key.offset, 353 - btrfs_node_blockptr(c, i)); 353 + btrfs_node_blockptr(c, i), 354 + btrfs_node_ptr_generation(c, i)); 354 355 } 356 + if (!follow) 357 + return; 355 358 for (i = 0; i < nr; i++) { 356 359 struct btrfs_key first_key; 357 360 struct extent_buffer *next; ··· 377 372 if (btrfs_header_level(next) != 378 373 level - 1) 379 374 BUG(); 380 - btrfs_print_tree(next); 375 + btrfs_print_tree(next, follow); 381 376 free_extent_buffer(next); 382 377 } 383 378 }
+1 -1
fs/btrfs/print-tree.h
··· 7 7 #define BTRFS_PRINT_TREE_H 8 8 9 9 void btrfs_print_leaf(struct extent_buffer *l); 10 - void btrfs_print_tree(struct extent_buffer *c); 10 + void btrfs_print_tree(struct extent_buffer *c, bool follow); 11 11 12 12 #endif
+41 -2
fs/btrfs/qgroup.c
··· 11 11 #include <linux/slab.h> 12 12 #include <linux/workqueue.h> 13 13 #include <linux/btrfs.h> 14 + #include <linux/sizes.h> 14 15 15 16 #include "ctree.h" 16 17 #include "transaction.h" ··· 2376 2375 return ret; 2377 2376 } 2378 2377 2379 - static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2378 + /* 2379 + * Two limits to commit transaction in advance. 2380 + * 2381 + * For RATIO, it will be 1/RATIO of the remaining limit 2382 + * (excluding data and prealloc meta) as threshold. 2383 + * For SIZE, it will be in byte unit as threshold. 2384 + */ 2385 + #define QGROUP_PERTRANS_RATIO 32 2386 + #define QGROUP_PERTRANS_SIZE SZ_32M 2387 + static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, 2388 + const struct btrfs_qgroup *qg, u64 num_bytes) 2380 2389 { 2390 + u64 limit; 2391 + u64 threshold; 2392 + 2381 2393 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2382 2394 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) 2383 2395 return false; ··· 2398 2384 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2399 2385 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) 2400 2386 return false; 2387 + 2388 + /* 2389 + * Even if we passed the check, it's better to check if reservation 2390 + * for meta_pertrans is pushing us near limit. 2391 + * If there is too much pertrans reservation or it's near the limit, 2392 + * let's try commit transaction to free some, using transaction_kthread 2393 + */ 2394 + if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | 2395 + BTRFS_QGROUP_LIMIT_MAX_EXCL))) { 2396 + if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) 2397 + limit = qg->max_excl; 2398 + else 2399 + limit = qg->max_rfer; 2400 + threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] - 2401 + qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) / 2402 + QGROUP_PERTRANS_RATIO; 2403 + threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE); 2404 + 2405 + /* 2406 + * Use transaction_kthread to commit transaction, so we no 2407 + * longer need to bother nested transaction nor lock context. 2408 + */ 2409 + if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold) 2410 + btrfs_commit_transaction_locksafe(fs_info); 2411 + } 2401 2412 2402 2413 return true; 2403 2414 } ··· 2473 2434 2474 2435 qg = unode_aux_to_qgroup(unode); 2475 2436 2476 - if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2437 + if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { 2477 2438 ret = -EDQUOT; 2478 2439 goto out; 2479 2440 }
+1
fs/btrfs/transaction.c
··· 2267 2267 */ 2268 2268 cur_trans->state = TRANS_STATE_COMPLETED; 2269 2269 wake_up(&cur_trans->commit_wait); 2270 + clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); 2270 2271 2271 2272 spin_lock(&fs_info->trans_lock); 2272 2273 list_del_init(&cur_trans->list);
+14
fs/btrfs/transaction.h
··· 199 199 int btrfs_commit_transaction(struct btrfs_trans_handle *trans); 200 200 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 201 201 int wait_for_unblock); 202 + 203 + /* 204 + * Try to commit transaction asynchronously, so this is safe to call 205 + * even holding a spinlock. 206 + * 207 + * It's done by informing transaction_kthread to commit transaction without 208 + * waiting for commit interval. 209 + */ 210 + static inline void btrfs_commit_transaction_locksafe( 211 + struct btrfs_fs_info *fs_info) 212 + { 213 + set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); 214 + wake_up_process(fs_info->transaction_kthread); 215 + } 202 216 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); 203 217 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); 204 218 void btrfs_throttle(struct btrfs_fs_info *fs_info);