Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
Btrfs: fix file clone ioctl for bookend extents
Btrfs: fix uninit compiler warning in cow_file_range_nocow
Btrfs: constify dentry_operations
Btrfs: optimize back reference update during btrfs_drop_snapshot
Btrfs: remove negative dentry when deleting subvolumne
Btrfs: optimize fsync for the single writer case
Btrfs: async delalloc flushing under space pressure
Btrfs: release delalloc reservations on extent item insertion
Btrfs: delay clearing EXTENT_DELALLOC for compressed extents
Btrfs: cleanup extent_clear_unlock_delalloc flags
Btrfs: fix possible softlockup in the allocator
Btrfs: fix deadlock on async thread startup

+447 -157
+73 -8
fs/btrfs/async-thread.c
··· 64 64 }; 65 65 66 66 /* 67 + * btrfs_start_workers uses kthread_run, which can block waiting for memory 68 + * for a very long time. It will actually throttle on page writeback, 69 + * and so it may not make progress until after our btrfs worker threads 70 + * process all of the pending work structs in their queue 71 + * 72 + * This means we can't use btrfs_start_workers from inside a btrfs worker 73 + * thread that is used as part of cleaning dirty memory, which pretty much 74 + * involves all of the worker threads. 75 + * 76 + * Instead we have a helper queue who never has more than one thread 77 + * where we scheduler thread start operations. This worker_start struct 78 + * is used to contain the work and hold a pointer to the queue that needs 79 + * another worker. 80 + */ 81 + struct worker_start { 82 + struct btrfs_work work; 83 + struct btrfs_workers *queue; 84 + }; 85 + 86 + static void start_new_worker_func(struct btrfs_work *work) 87 + { 88 + struct worker_start *start; 89 + start = container_of(work, struct worker_start, work); 90 + btrfs_start_workers(start->queue, 1); 91 + kfree(start); 92 + } 93 + 94 + static int start_new_worker(struct btrfs_workers *queue) 95 + { 96 + struct worker_start *start; 97 + int ret; 98 + 99 + start = kzalloc(sizeof(*start), GFP_NOFS); 100 + if (!start) 101 + return -ENOMEM; 102 + 103 + start->work.func = start_new_worker_func; 104 + start->queue = queue; 105 + ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); 106 + if (ret) 107 + kfree(start); 108 + return ret; 109 + } 110 + 111 + /* 67 112 * helper function to move a thread onto the idle list after it 68 113 * has finished some requests. 69 114 */ ··· 163 118 goto out; 164 119 165 120 workers->atomic_start_pending = 0; 166 - if (workers->num_workers >= workers->max_workers) 121 + if (workers->num_workers + workers->num_workers_starting >= 122 + workers->max_workers) 167 123 goto out; 168 124 125 + workers->num_workers_starting += 1; 169 126 spin_unlock_irqrestore(&workers->lock, flags); 170 - btrfs_start_workers(workers, 1); 127 + start_new_worker(workers); 171 128 return; 172 129 173 130 out: ··· 437 390 /* 438 391 * simple init on struct btrfs_workers 439 392 */ 440 - void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) 393 + void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 394 + struct btrfs_workers *async_helper) 441 395 { 442 396 workers->num_workers = 0; 397 + workers->num_workers_starting = 0; 443 398 INIT_LIST_HEAD(&workers->worker_list); 444 399 INIT_LIST_HEAD(&workers->idle_list); 445 400 INIT_LIST_HEAD(&workers->order_list); ··· 453 404 workers->name = name; 454 405 workers->ordered = 0; 455 406 workers->atomic_start_pending = 0; 456 - workers->atomic_worker_start = 0; 407 + workers->atomic_worker_start = async_helper; 457 408 } 458 409 459 410 /* 460 411 * starts new worker threads. This does not enforce the max worker 461 412 * count in case you need to temporarily go past it. 462 413 */ 463 - int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 414 + static int __btrfs_start_workers(struct btrfs_workers *workers, 415 + int num_workers) 464 416 { 465 417 struct btrfs_worker_thread *worker; 466 418 int ret = 0; ··· 494 444 list_add_tail(&worker->worker_list, &workers->idle_list); 495 445 worker->idle = 1; 496 446 workers->num_workers++; 447 + workers->num_workers_starting--; 448 + WARN_ON(workers->num_workers_starting < 0); 497 449 spin_unlock_irq(&workers->lock); 498 450 } 499 451 return 0; 500 452 fail: 501 453 btrfs_stop_workers(workers); 502 454 return ret; 455 + } 456 + 457 + int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 458 + { 459 + spin_lock_irq(&workers->lock); 460 + workers->num_workers_starting += num_workers; 461 + spin_unlock_irq(&workers->lock); 462 + return __btrfs_start_workers(workers, num_workers); 503 463 } 504 464 505 465 /* ··· 521 461 { 522 462 struct btrfs_worker_thread *worker; 523 463 struct list_head *next; 524 - int enforce_min = workers->num_workers < workers->max_workers; 464 + int enforce_min; 465 + 466 + enforce_min = (workers->num_workers + workers->num_workers_starting) < 467 + workers->max_workers; 525 468 526 469 /* 527 470 * if we find an idle thread, don't move it to the end of the ··· 572 509 worker = next_worker(workers); 573 510 574 511 if (!worker) { 575 - if (workers->num_workers >= workers->max_workers) { 512 + if (workers->num_workers + workers->num_workers_starting >= 513 + workers->max_workers) { 576 514 goto fallback; 577 515 } else if (workers->atomic_worker_start) { 578 516 workers->atomic_start_pending = 1; 579 517 goto fallback; 580 518 } else { 519 + workers->num_workers_starting++; 581 520 spin_unlock_irqrestore(&workers->lock, flags); 582 521 /* we're below the limit, start another worker */ 583 - btrfs_start_workers(workers, 1); 522 + __btrfs_start_workers(workers, 1); 584 523 goto again; 585 524 } 586 525 }
+7 -3
fs/btrfs/async-thread.h
··· 64 64 /* current number of running workers */ 65 65 int num_workers; 66 66 67 + int num_workers_starting; 68 + 67 69 /* max number of workers allowed. changed by btrfs_start_workers */ 68 70 int max_workers; 69 71 ··· 80 78 81 79 /* 82 80 * are we allowed to sleep while starting workers or are we required 83 - * to start them at a later time? 81 + * to start them at a later time? If we can't sleep, this indicates 82 + * which queue we need to use to schedule thread creation. 84 83 */ 85 - int atomic_worker_start; 84 + struct btrfs_workers *atomic_worker_start; 86 85 87 86 /* list with all the work threads. The workers on the idle thread 88 87 * may be actively servicing jobs, but they haven't yet hit the ··· 112 109 int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 113 110 int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); 114 111 int btrfs_stop_workers(struct btrfs_workers *workers); 115 - void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); 112 + void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 113 + struct btrfs_workers *async_starter); 116 114 int btrfs_requeue_work(struct btrfs_work *work); 117 115 void btrfs_set_work_high_prio(struct btrfs_work *work); 118 116 #endif
+7 -5
fs/btrfs/btrfs_inode.h
··· 128 128 u64 last_unlink_trans; 129 129 130 130 /* 131 - * These two counters are for delalloc metadata reservations. We keep 132 - * track of how many extents we've accounted for vs how many extents we 133 - * have. 131 + * Counters to keep track of the number of extent item's we may use due 132 + * to delalloc and such. outstanding_extents is the number of extent 133 + * items we think we'll end up using, and reserved_extents is the number 134 + * of extent items we've reserved metadata for. 134 135 */ 135 - int delalloc_reserved_extents; 136 - int delalloc_extents; 136 + spinlock_t accounting_lock; 137 + int reserved_extents; 138 + int outstanding_extents; 137 139 138 140 /* 139 141 * ordered_data_close is set by truncate when a file that used
+11 -4
fs/btrfs/ctree.h
··· 691 691 692 692 struct list_head list; 693 693 694 + /* for controlling how we free up space for allocations */ 695 + wait_queue_head_t allocate_wait; 696 + wait_queue_head_t flush_wait; 697 + int allocating_chunk; 698 + int flushing; 699 + 694 700 /* for block groups in our same type */ 695 701 struct list_head block_groups; 696 702 spinlock_t lock; 697 703 struct rw_semaphore groups_sem; 698 704 atomic_t caching_threads; 699 - 700 - int allocating_chunk; 701 - wait_queue_head_t wait; 702 705 }; 703 706 704 707 /* ··· 910 907 * A third pool does submit_bio to avoid deadlocking with the other 911 908 * two 912 909 */ 910 + struct btrfs_workers generic_worker; 913 911 struct btrfs_workers workers; 914 912 struct btrfs_workers delalloc_workers; 915 913 struct btrfs_workers endio_workers; ··· 918 914 struct btrfs_workers endio_meta_write_workers; 919 915 struct btrfs_workers endio_write_workers; 920 916 struct btrfs_workers submit_workers; 917 + struct btrfs_workers enospc_workers; 921 918 /* 922 919 * fixup workers take dirty pages that didn't properly go through 923 920 * the cow mechanism and make them safe to write. It happens ··· 1010 1005 atomic_t log_commit[2]; 1011 1006 unsigned long log_transid; 1012 1007 unsigned long log_batch; 1008 + pid_t log_start_pid; 1009 + bool log_multiple_pids; 1013 1010 1014 1011 u64 objectid; 1015 1012 u64 last_trans; ··· 2330 2323 void btrfs_orphan_cleanup(struct btrfs_root *root); 2331 2324 int btrfs_cont_expand(struct inode *inode, loff_t size); 2332 2325 int btrfs_invalidate_inodes(struct btrfs_root *root); 2333 - extern struct dentry_operations btrfs_dentry_operations; 2326 + extern const struct dentry_operations btrfs_dentry_operations; 2334 2327 2335 2328 /* ioctl.c */ 2336 2329 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+29 -19
fs/btrfs/disk-io.c
··· 1746 1746 err = -EINVAL; 1747 1747 goto fail_iput; 1748 1748 } 1749 - printk("thread pool is %d\n", fs_info->thread_pool_size); 1750 - /* 1751 - * we need to start all the end_io workers up front because the 1752 - * queue work function gets called at interrupt time, and so it 1753 - * cannot dynamically grow. 1754 - */ 1749 + 1750 + btrfs_init_workers(&fs_info->generic_worker, 1751 + "genwork", 1, NULL); 1752 + 1755 1753 btrfs_init_workers(&fs_info->workers, "worker", 1756 - fs_info->thread_pool_size); 1754 + fs_info->thread_pool_size, 1755 + &fs_info->generic_worker); 1757 1756 1758 1757 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", 1759 - fs_info->thread_pool_size); 1758 + fs_info->thread_pool_size, 1759 + &fs_info->generic_worker); 1760 1760 1761 1761 btrfs_init_workers(&fs_info->submit_workers, "submit", 1762 1762 min_t(u64, fs_devices->num_devices, 1763 - fs_info->thread_pool_size)); 1763 + fs_info->thread_pool_size), 1764 + &fs_info->generic_worker); 1765 + btrfs_init_workers(&fs_info->enospc_workers, "enospc", 1766 + fs_info->thread_pool_size, 1767 + &fs_info->generic_worker); 1764 1768 1765 1769 /* a higher idle thresh on the submit workers makes it much more 1766 1770 * likely that bios will be send down in a sane order to the ··· 1778 1774 fs_info->delalloc_workers.idle_thresh = 2; 1779 1775 fs_info->delalloc_workers.ordered = 1; 1780 1776 1781 - btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); 1777 + btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, 1778 + &fs_info->generic_worker); 1782 1779 btrfs_init_workers(&fs_info->endio_workers, "endio", 1783 - fs_info->thread_pool_size); 1780 + fs_info->thread_pool_size, 1781 + &fs_info->generic_worker); 1784 1782 btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", 1785 - fs_info->thread_pool_size); 1783 + fs_info->thread_pool_size, 1784 + &fs_info->generic_worker); 1786 1785 btrfs_init_workers(&fs_info->endio_meta_write_workers, 1787 - "endio-meta-write", fs_info->thread_pool_size); 1786 + "endio-meta-write", fs_info->thread_pool_size, 1787 + &fs_info->generic_worker); 1788 1788 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1789 - fs_info->thread_pool_size); 1789 + fs_info->thread_pool_size, 1790 + &fs_info->generic_worker); 1790 1791 1791 1792 /* 1792 1793 * endios are largely parallel and should have a very ··· 1803 1794 fs_info->endio_write_workers.idle_thresh = 2; 1804 1795 fs_info->endio_meta_write_workers.idle_thresh = 2; 1805 1796 1806 - fs_info->endio_workers.atomic_worker_start = 1; 1807 - fs_info->endio_meta_workers.atomic_worker_start = 1; 1808 - fs_info->endio_write_workers.atomic_worker_start = 1; 1809 - fs_info->endio_meta_write_workers.atomic_worker_start = 1; 1810 - 1811 1797 btrfs_start_workers(&fs_info->workers, 1); 1798 + btrfs_start_workers(&fs_info->generic_worker, 1); 1812 1799 btrfs_start_workers(&fs_info->submit_workers, 1); 1813 1800 btrfs_start_workers(&fs_info->delalloc_workers, 1); 1814 1801 btrfs_start_workers(&fs_info->fixup_workers, 1); ··· 1812 1807 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1813 1808 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1814 1809 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1810 + btrfs_start_workers(&fs_info->enospc_workers, 1); 1815 1811 1816 1812 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1817 1813 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, ··· 2018 2012 free_extent_buffer(chunk_root->node); 2019 2013 free_extent_buffer(chunk_root->commit_root); 2020 2014 fail_sb_buffer: 2015 + btrfs_stop_workers(&fs_info->generic_worker); 2021 2016 btrfs_stop_workers(&fs_info->fixup_workers); 2022 2017 btrfs_stop_workers(&fs_info->delalloc_workers); 2023 2018 btrfs_stop_workers(&fs_info->workers); ··· 2027 2020 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2028 2021 btrfs_stop_workers(&fs_info->endio_write_workers); 2029 2022 btrfs_stop_workers(&fs_info->submit_workers); 2023 + btrfs_stop_workers(&fs_info->enospc_workers); 2030 2024 fail_iput: 2031 2025 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2032 2026 iput(fs_info->btree_inode); ··· 2445 2437 2446 2438 iput(fs_info->btree_inode); 2447 2439 2440 + btrfs_stop_workers(&fs_info->generic_worker); 2448 2441 btrfs_stop_workers(&fs_info->fixup_workers); 2449 2442 btrfs_stop_workers(&fs_info->delalloc_workers); 2450 2443 btrfs_stop_workers(&fs_info->workers); ··· 2454 2445 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2455 2446 btrfs_stop_workers(&fs_info->endio_write_workers); 2456 2447 btrfs_stop_workers(&fs_info->submit_workers); 2448 + btrfs_stop_workers(&fs_info->enospc_workers); 2457 2449 2458 2450 btrfs_close_devices(fs_info->fs_devices); 2459 2451 btrfs_mapping_tree_free(&fs_info->mapping_tree);
+186 -49
fs/btrfs/extent-tree.c
··· 2824 2824 num_items); 2825 2825 2826 2826 spin_lock(&meta_sinfo->lock); 2827 - if (BTRFS_I(inode)->delalloc_reserved_extents <= 2828 - BTRFS_I(inode)->delalloc_extents) { 2827 + spin_lock(&BTRFS_I(inode)->accounting_lock); 2828 + if (BTRFS_I(inode)->reserved_extents <= 2829 + BTRFS_I(inode)->outstanding_extents) { 2830 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 2829 2831 spin_unlock(&meta_sinfo->lock); 2830 2832 return 0; 2831 2833 } 2834 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 2832 2835 2833 - BTRFS_I(inode)->delalloc_reserved_extents--; 2834 - BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); 2836 + BTRFS_I(inode)->reserved_extents--; 2837 + BUG_ON(BTRFS_I(inode)->reserved_extents < 0); 2835 2838 2836 2839 if (meta_sinfo->bytes_delalloc < num_bytes) { 2837 2840 bug = true; ··· 2867 2864 meta_sinfo->force_delalloc = 0; 2868 2865 } 2869 2866 2867 + struct async_flush { 2868 + struct btrfs_root *root; 2869 + struct btrfs_space_info *info; 2870 + struct btrfs_work work; 2871 + }; 2872 + 2873 + static noinline void flush_delalloc_async(struct btrfs_work *work) 2874 + { 2875 + struct async_flush *async; 2876 + struct btrfs_root *root; 2877 + struct btrfs_space_info *info; 2878 + 2879 + async = container_of(work, struct async_flush, work); 2880 + root = async->root; 2881 + info = async->info; 2882 + 2883 + btrfs_start_delalloc_inodes(root); 2884 + wake_up(&info->flush_wait); 2885 + btrfs_wait_ordered_extents(root, 0); 2886 + 2887 + spin_lock(&info->lock); 2888 + info->flushing = 0; 2889 + spin_unlock(&info->lock); 2890 + wake_up(&info->flush_wait); 2891 + 2892 + kfree(async); 2893 + } 2894 + 2895 + static void wait_on_flush(struct btrfs_space_info *info) 2896 + { 2897 + DEFINE_WAIT(wait); 2898 + u64 used; 2899 + 2900 + while (1) { 2901 + prepare_to_wait(&info->flush_wait, &wait, 2902 + TASK_UNINTERRUPTIBLE); 2903 + spin_lock(&info->lock); 2904 + if (!info->flushing) { 2905 + spin_unlock(&info->lock); 2906 + break; 2907 + } 2908 + 2909 + used = info->bytes_used + info->bytes_reserved + 2910 + info->bytes_pinned + info->bytes_readonly + 2911 + info->bytes_super + info->bytes_root + 2912 + info->bytes_may_use + info->bytes_delalloc; 2913 + if (used < info->total_bytes) { 2914 + spin_unlock(&info->lock); 2915 + break; 2916 + } 2917 + spin_unlock(&info->lock); 2918 + schedule(); 2919 + } 2920 + finish_wait(&info->flush_wait, &wait); 2921 + } 2922 + 2923 + static void flush_delalloc(struct btrfs_root *root, 2924 + struct btrfs_space_info *info) 2925 + { 2926 + struct async_flush *async; 2927 + bool wait = false; 2928 + 2929 + spin_lock(&info->lock); 2930 + 2931 + if (!info->flushing) { 2932 + info->flushing = 1; 2933 + init_waitqueue_head(&info->flush_wait); 2934 + } else { 2935 + wait = true; 2936 + } 2937 + 2938 + spin_unlock(&info->lock); 2939 + 2940 + if (wait) { 2941 + wait_on_flush(info); 2942 + return; 2943 + } 2944 + 2945 + async = kzalloc(sizeof(*async), GFP_NOFS); 2946 + if (!async) 2947 + goto flush; 2948 + 2949 + async->root = root; 2950 + async->info = info; 2951 + async->work.func = flush_delalloc_async; 2952 + 2953 + btrfs_queue_worker(&root->fs_info->enospc_workers, 2954 + &async->work); 2955 + wait_on_flush(info); 2956 + return; 2957 + 2958 + flush: 2959 + btrfs_start_delalloc_inodes(root); 2960 + btrfs_wait_ordered_extents(root, 0); 2961 + 2962 + spin_lock(&info->lock); 2963 + info->flushing = 0; 2964 + spin_unlock(&info->lock); 2965 + wake_up(&info->flush_wait); 2966 + } 2967 + 2870 2968 static int maybe_allocate_chunk(struct btrfs_root *root, 2871 2969 struct btrfs_space_info *info) 2872 2970 { ··· 2998 2894 if (!info->allocating_chunk) { 2999 2895 info->force_alloc = 1; 3000 2896 info->allocating_chunk = 1; 3001 - init_waitqueue_head(&info->wait); 2897 + init_waitqueue_head(&info->allocate_wait); 3002 2898 } else { 3003 2899 wait = true; 3004 2900 } ··· 3006 2902 spin_unlock(&info->lock); 3007 2903 3008 2904 if (wait) { 3009 - wait_event(info->wait, 2905 + wait_event(info->allocate_wait, 3010 2906 !info->allocating_chunk); 3011 2907 return 1; 3012 2908 } ··· 3027 2923 spin_lock(&info->lock); 3028 2924 info->allocating_chunk = 0; 3029 2925 spin_unlock(&info->lock); 3030 - wake_up(&info->wait); 2926 + wake_up(&info->allocate_wait); 3031 2927 3032 2928 if (ret) 3033 2929 return 0; ··· 3085 2981 filemap_flush(inode->i_mapping); 3086 2982 goto again; 3087 2983 } else if (flushed == 3) { 3088 - btrfs_start_delalloc_inodes(root); 3089 - btrfs_wait_ordered_extents(root, 0); 2984 + flush_delalloc(root, meta_sinfo); 3090 2985 goto again; 3091 2986 } 3092 2987 spin_lock(&meta_sinfo->lock); 3093 2988 meta_sinfo->bytes_delalloc -= num_bytes; 3094 2989 spin_unlock(&meta_sinfo->lock); 3095 2990 printk(KERN_ERR "enospc, has %d, reserved %d\n", 3096 - BTRFS_I(inode)->delalloc_extents, 3097 - BTRFS_I(inode)->delalloc_reserved_extents); 2991 + BTRFS_I(inode)->outstanding_extents, 2992 + BTRFS_I(inode)->reserved_extents); 3098 2993 dump_space_info(meta_sinfo, 0, 0); 3099 2994 return -ENOSPC; 3100 2995 } 3101 2996 3102 - BTRFS_I(inode)->delalloc_reserved_extents++; 2997 + BTRFS_I(inode)->reserved_extents++; 3103 2998 check_force_delalloc(meta_sinfo); 3104 2999 spin_unlock(&meta_sinfo->lock); 3105 3000 ··· 3197 3094 } 3198 3095 3199 3096 if (retries == 2) { 3200 - btrfs_start_delalloc_inodes(root); 3201 - btrfs_wait_ordered_extents(root, 0); 3097 + flush_delalloc(root, meta_sinfo); 3202 3098 goto again; 3203 3099 } 3204 3100 spin_lock(&meta_sinfo->lock); ··· 4131 4029 int loop = 0; 4132 4030 bool found_uncached_bg = false; 4133 4031 bool failed_cluster_refill = false; 4032 + bool failed_alloc = false; 4134 4033 4135 4034 WARN_ON(num_bytes < root->sectorsize); 4136 4035 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); ··· 4336 4233 4337 4234 offset = btrfs_find_space_for_alloc(block_group, search_start, 4338 4235 num_bytes, empty_size); 4339 - if (!offset && (cached || (!cached && 4340 - loop == LOOP_CACHING_NOWAIT))) { 4341 - goto loop; 4342 - } else if (!offset && (!cached && 4343 - loop > LOOP_CACHING_NOWAIT)) { 4236 + /* 4237 + * If we didn't find a chunk, and we haven't failed on this 4238 + * block group before, and this block group is in the middle of 4239 + * caching and we are ok with waiting, then go ahead and wait 4240 + * for progress to be made, and set failed_alloc to true. 4241 + * 4242 + * If failed_alloc is true then we've already waited on this 4243 + * block group once and should move on to the next block group. 4244 + */ 4245 + if (!offset && !failed_alloc && !cached && 4246 + loop > LOOP_CACHING_NOWAIT) { 4344 4247 wait_block_group_cache_progress(block_group, 4345 - num_bytes + empty_size); 4248 + num_bytes + empty_size); 4249 + failed_alloc = true; 4346 4250 goto have_block_group; 4251 + } else if (!offset) { 4252 + goto loop; 4347 4253 } 4348 4254 checks: 4349 4255 search_start = stripe_align(root, offset); ··· 4400 4288 break; 4401 4289 loop: 4402 4290 failed_cluster_refill = false; 4291 + failed_alloc = false; 4403 4292 btrfs_put_block_group(block_group); 4404 4293 } 4405 4294 up_read(&space_info->groups_sem); ··· 4912 4799 u64 bytenr; 4913 4800 u64 generation; 4914 4801 u64 refs; 4802 + u64 flags; 4915 4803 u64 last = 0; 4916 4804 u32 nritems; 4917 4805 u32 blocksize; ··· 4950 4836 generation <= root->root_key.offset) 4951 4837 continue; 4952 4838 4839 + /* We don't lock the tree block, it's OK to be racy here */ 4840 + ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4841 + &refs, &flags); 4842 + BUG_ON(ret); 4843 + BUG_ON(refs == 0); 4844 + 4953 4845 if (wc->stage == DROP_REFERENCE) { 4954 - ret = btrfs_lookup_extent_info(trans, root, 4955 - bytenr, blocksize, 4956 - &refs, NULL); 4957 - BUG_ON(ret); 4958 - BUG_ON(refs == 0); 4959 4846 if (refs == 1) 4960 4847 goto reada; 4961 4848 4849 + if (wc->level == 1 && 4850 + (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 4851 + continue; 4962 4852 if (!wc->update_ref || 4963 4853 generation <= root->root_key.offset) 4964 4854 continue; ··· 4970 4852 ret = btrfs_comp_cpu_keys(&key, 4971 4853 &wc->update_progress); 4972 4854 if (ret < 0) 4855 + continue; 4856 + } else { 4857 + if (wc->level == 1 && 4858 + (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 4973 4859 continue; 4974 4860 } 4975 4861 reada: ··· 4998 4876 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4999 4877 struct btrfs_root *root, 5000 4878 struct btrfs_path *path, 5001 - struct walk_control *wc) 4879 + struct walk_control *wc, int lookup_info) 5002 4880 { 5003 4881 int level = wc->level; 5004 4882 struct extent_buffer *eb = path->nodes[level]; ··· 5013 4891 * when reference count of tree block is 1, it won't increase 5014 4892 * again. once full backref flag is set, we never clear it. 5015 4893 */ 5016 - if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5017 - (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 4894 + if (lookup_info && 4895 + ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4896 + (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5018 4897 BUG_ON(!path->locks[level]); 5019 4898 ret = btrfs_lookup_extent_info(trans, root, 5020 4899 eb->start, eb->len, ··· 5076 4953 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5077 4954 struct btrfs_root *root, 5078 4955 struct btrfs_path *path, 5079 - struct walk_control *wc) 4956 + struct walk_control *wc, int *lookup_info) 5080 4957 { 5081 4958 u64 bytenr; 5082 4959 u64 generation; ··· 5096 4973 * for the subtree 5097 4974 */ 5098 4975 if (wc->stage == UPDATE_BACKREF && 5099 - generation <= root->root_key.offset) 4976 + generation <= root->root_key.offset) { 4977 + *lookup_info = 1; 5100 4978 return 1; 4979 + } 5101 4980 5102 4981 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5103 4982 blocksize = btrfs_level_size(root, level - 1); ··· 5112 4987 btrfs_tree_lock(next); 5113 4988 btrfs_set_lock_blocking(next); 5114 4989 5115 - if (wc->stage == DROP_REFERENCE) { 5116 - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5117 - &wc->refs[level - 1], 5118 - &wc->flags[level - 1]); 5119 - BUG_ON(ret); 5120 - BUG_ON(wc->refs[level - 1] == 0); 4990 + ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4991 + &wc->refs[level - 1], 4992 + &wc->flags[level - 1]); 4993 + BUG_ON(ret); 4994 + BUG_ON(wc->refs[level - 1] == 0); 4995 + *lookup_info = 0; 5121 4996 4997 + if (wc->stage == DROP_REFERENCE) { 5122 4998 if (wc->refs[level - 1] > 1) { 4999 + if (level == 1 && 5000 + (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5001 + goto skip; 5002 + 5123 5003 if (!wc->update_ref || 5124 5004 generation <= root->root_key.offset) 5125 5005 goto skip; ··· 5138 5008 wc->stage = UPDATE_BACKREF; 5139 5009 wc->shared_level = level - 1; 5140 5010 } 5011 + } else { 5012 + if (level == 1 && 5013 + (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5014 + goto skip; 5141 5015 } 5142 5016 5143 5017 if (!btrfs_buffer_uptodate(next, generation)) { 5144 5018 btrfs_tree_unlock(next); 5145 5019 free_extent_buffer(next); 5146 5020 next = NULL; 5021 + *lookup_info = 1; 5147 5022 } 5148 5023 5149 5024 if (!next) { ··· 5171 5036 skip: 5172 5037 wc->refs[level - 1] = 0; 5173 5038 wc->flags[level - 1] = 0; 5039 + if (wc->stage == DROP_REFERENCE) { 5040 + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5041 + parent = path->nodes[level]->start; 5042 + } else { 5043 + BUG_ON(root->root_key.objectid != 5044 + btrfs_header_owner(path->nodes[level])); 5045 + parent = 0; 5046 + } 5174 5047 5175 - if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5176 - parent = path->nodes[level]->start; 5177 - } else { 5178 - BUG_ON(root->root_key.objectid != 5179 - btrfs_header_owner(path->nodes[level])); 5180 - parent = 0; 5048 + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 5049 + root->root_key.objectid, level - 1, 0); 5050 + BUG_ON(ret); 5181 5051 } 5182 - 5183 - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 5184 - root->root_key.objectid, level - 1, 0); 5185 - BUG_ON(ret); 5186 - 5187 5052 btrfs_tree_unlock(next); 5188 5053 free_extent_buffer(next); 5054 + *lookup_info = 1; 5189 5055 return 1; 5190 5056 } 5191 5057 ··· 5300 5164 struct walk_control *wc) 5301 5165 { 5302 5166 int level = wc->level; 5167 + int lookup_info = 1; 5303 5168 int ret; 5304 5169 5305 5170 while (level >= 0) { ··· 5308 5171 btrfs_header_nritems(path->nodes[level])) 5309 5172 break; 5310 5173 5311 - ret = walk_down_proc(trans, root, path, wc); 5174 + ret = walk_down_proc(trans, root, path, wc, lookup_info); 5312 5175 if (ret > 0) 5313 5176 break; 5314 5177 5315 5178 if (level == 0) 5316 5179 break; 5317 5180 5318 - ret = do_walk_down(trans, root, path, wc); 5181 + ret = do_walk_down(trans, root, path, wc, &lookup_info); 5319 5182 if (ret > 0) { 5320 5183 path->slots[level]++; 5321 5184 continue;
+22 -20
fs/btrfs/extent_io.c
··· 460 460 struct extent_state *state, int bits, int wake, 461 461 int delete) 462 462 { 463 - int ret = state->state & bits; 463 + int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; 464 + int ret = state->state & bits_to_clear; 464 465 465 466 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 466 467 u64 range = state->end - state->start + 1; ··· 469 468 tree->dirty_bytes -= range; 470 469 } 471 470 clear_state_cb(tree, state, bits); 472 - state->state &= ~bits; 471 + state->state &= ~bits_to_clear; 473 472 if (wake) 474 473 wake_up(&state->wq); 475 474 if (delete || state->state == 0) { ··· 957 956 gfp_t mask) 958 957 { 959 958 return clear_extent_bit(tree, start, end, 960 - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 959 + EXTENT_DIRTY | EXTENT_DELALLOC | 960 + EXTENT_DO_ACCOUNTING, 0, 0, 961 961 NULL, mask); 962 962 } 963 963 ··· 1403 1401 int extent_clear_unlock_delalloc(struct inode *inode, 1404 1402 struct extent_io_tree *tree, 1405 1403 u64 start, u64 end, struct page *locked_page, 1406 - int unlock_pages, 1407 - int clear_unlock, 1408 - int clear_delalloc, int clear_dirty, 1409 - int set_writeback, 1410 - int end_writeback, 1411 - int set_private2) 1404 + unsigned long op) 1412 1405 { 1413 1406 int ret; 1414 1407 struct page *pages[16]; ··· 1413 1416 int i; 1414 1417 int clear_bits = 0; 1415 1418 1416 - if (clear_unlock) 1419 + if (op & EXTENT_CLEAR_UNLOCK) 1417 1420 clear_bits |= EXTENT_LOCKED; 1418 - if (clear_dirty) 1421 + if (op & EXTENT_CLEAR_DIRTY) 1419 1422 clear_bits |= EXTENT_DIRTY; 1420 1423 1421 - if (clear_delalloc) 1424 + if (op & EXTENT_CLEAR_DELALLOC) 1422 1425 clear_bits |= EXTENT_DELALLOC; 1423 1426 1427 + if (op & EXTENT_CLEAR_ACCOUNTING) 1428 + clear_bits |= EXTENT_DO_ACCOUNTING; 1429 + 1424 1430 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1425 - if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || 1426 - set_private2)) 1431 + if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1432 + EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | 1433 + EXTENT_SET_PRIVATE2))) 1427 1434 return 0; 1428 1435 1429 1436 while (nr_pages > 0) { ··· 1436 1435 nr_pages, ARRAY_SIZE(pages)), pages); 1437 1436 for (i = 0; i < ret; i++) { 1438 1437 1439 - if (set_private2) 1438 + if (op & EXTENT_SET_PRIVATE2) 1440 1439 SetPagePrivate2(pages[i]); 1441 1440 1442 1441 if (pages[i] == locked_page) { 1443 1442 page_cache_release(pages[i]); 1444 1443 continue; 1445 1444 } 1446 - if (clear_dirty) 1445 + if (op & EXTENT_CLEAR_DIRTY) 1447 1446 clear_page_dirty_for_io(pages[i]); 1448 - if (set_writeback) 1447 + if (op & EXTENT_SET_WRITEBACK) 1449 1448 set_page_writeback(pages[i]); 1450 - if (end_writeback) 1449 + if (op & EXTENT_END_WRITEBACK) 1451 1450 end_page_writeback(pages[i]); 1452 - if (unlock_pages) 1451 + if (op & EXTENT_CLEAR_UNLOCK_PAGE) 1453 1452 unlock_page(pages[i]); 1454 1453 page_cache_release(pages[i]); 1455 1454 } ··· 2715 2714 lock_extent(tree, start, end, GFP_NOFS); 2716 2715 wait_on_page_writeback(page); 2717 2716 clear_extent_bit(tree, start, end, 2718 - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2717 + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 2718 + EXTENT_DO_ACCOUNTING, 2719 2719 1, 1, NULL, GFP_NOFS); 2720 2720 return 0; 2721 2721 }
+12 -6
fs/btrfs/extent_io.h
··· 15 15 #define EXTENT_BUFFER_FILLED (1 << 8) 16 16 #define EXTENT_BOUNDARY (1 << 9) 17 17 #define EXTENT_NODATASUM (1 << 10) 18 + #define EXTENT_DO_ACCOUNTING (1 << 11) 18 19 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 19 20 20 21 /* flags for bio submission */ ··· 25 24 #define EXTENT_BUFFER_UPTODATE 0 26 25 #define EXTENT_BUFFER_BLOCKING 1 27 26 #define EXTENT_BUFFER_DIRTY 2 27 + 28 + /* these are flags for extent_clear_unlock_delalloc */ 29 + #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 30 + #define EXTENT_CLEAR_UNLOCK 0x2 31 + #define EXTENT_CLEAR_DELALLOC 0x4 32 + #define EXTENT_CLEAR_DIRTY 0x8 33 + #define EXTENT_SET_WRITEBACK 0x10 34 + #define EXTENT_END_WRITEBACK 0x20 35 + #define EXTENT_SET_PRIVATE2 0x40 36 + #define EXTENT_CLEAR_ACCOUNTING 0x80 28 37 29 38 /* 30 39 * page->private values. Every page that is controlled by the extent ··· 299 288 int extent_clear_unlock_delalloc(struct inode *inode, 300 289 struct extent_io_tree *tree, 301 290 u64 start, u64 end, struct page *locked_page, 302 - int unlock_page, 303 - int clear_unlock, 304 - int clear_delalloc, int clear_dirty, 305 - int set_writeback, 306 - int end_writeback, 307 - int set_private2); 291 + unsigned long op); 308 292 #endif
+2 -1
fs/btrfs/file.c
··· 878 878 btrfs_put_ordered_extent(ordered); 879 879 880 880 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 881 - last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 881 + last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 882 + EXTENT_DO_ACCOUNTING, 882 883 GFP_NOFS); 883 884 unlock_extent(&BTRFS_I(inode)->io_tree, 884 885 start_pos, last_pos - 1, GFP_NOFS);
+74 -37
fs/btrfs/inode.c
··· 424 424 * and free up our temp pages. 425 425 */ 426 426 extent_clear_unlock_delalloc(inode, 427 - &BTRFS_I(inode)->io_tree, 428 - start, end, NULL, 1, 0, 429 - 0, 1, 1, 1, 0); 427 + &BTRFS_I(inode)->io_tree, 428 + start, end, NULL, 429 + EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 430 + EXTENT_CLEAR_DELALLOC | 431 + EXTENT_CLEAR_ACCOUNTING | 432 + EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 430 433 ret = 0; 431 434 goto free_pages_out; 432 435 } ··· 640 637 * clear dirty, set writeback and unlock the pages. 641 638 */ 642 639 extent_clear_unlock_delalloc(inode, 643 - &BTRFS_I(inode)->io_tree, 644 - async_extent->start, 645 - async_extent->start + 646 - async_extent->ram_size - 1, 647 - NULL, 1, 1, 0, 1, 1, 0, 0); 640 + &BTRFS_I(inode)->io_tree, 641 + async_extent->start, 642 + async_extent->start + 643 + async_extent->ram_size - 1, 644 + NULL, EXTENT_CLEAR_UNLOCK_PAGE | 645 + EXTENT_CLEAR_UNLOCK | 646 + EXTENT_CLEAR_DELALLOC | 647 + EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); 648 648 649 649 ret = btrfs_submit_compressed_write(inode, 650 650 async_extent->start, ··· 718 712 start, end, 0, NULL); 719 713 if (ret == 0) { 720 714 extent_clear_unlock_delalloc(inode, 721 - &BTRFS_I(inode)->io_tree, 722 - start, end, NULL, 1, 1, 723 - 1, 1, 1, 1, 0); 715 + &BTRFS_I(inode)->io_tree, 716 + start, end, NULL, 717 + EXTENT_CLEAR_UNLOCK_PAGE | 718 + EXTENT_CLEAR_UNLOCK | 719 + EXTENT_CLEAR_DELALLOC | 720 + EXTENT_CLEAR_ACCOUNTING | 721 + EXTENT_CLEAR_DIRTY | 722 + EXTENT_SET_WRITEBACK | 723 + EXTENT_END_WRITEBACK); 724 724 *nr_written = *nr_written + 725 725 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 726 726 *page_started = 1; ··· 750 738 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 751 739 752 740 while (disk_num_bytes > 0) { 741 + unsigned long op; 742 + 753 743 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 754 744 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 755 745 root->sectorsize, 0, alloc_hint, ··· 803 789 * Do set the Private2 bit so we know this page was properly 804 790 * setup for writepage 805 791 */ 792 + op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; 793 + op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 794 + EXTENT_SET_PRIVATE2; 795 + 806 796 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 807 797 start, start + ram_size - 1, 808 - locked_page, unlock, 1, 809 - 1, 0, 0, 0, 1); 798 + locked_page, op); 810 799 disk_num_bytes -= cur_alloc_size; 811 800 num_bytes -= cur_alloc_size; 812 801 alloc_hint = ins.objectid + ins.offset; ··· 881 864 u64 cur_end; 882 865 int limit = 10 * 1024 * 1042; 883 866 884 - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | 885 - EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); 867 + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 868 + 1, 0, NULL, GFP_NOFS); 886 869 while (start < end) { 887 870 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 888 871 async_cow->inode = inode; ··· 1023 1006 1024 1007 if (found_key.offset > cur_offset) { 1025 1008 extent_end = found_key.offset; 1009 + extent_type = 0; 1026 1010 goto out_check; 1027 1011 } 1028 1012 ··· 1130 1112 BUG_ON(ret); 1131 1113 1132 1114 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1133 - cur_offset, cur_offset + num_bytes - 1, 1134 - locked_page, 1, 1, 1, 0, 0, 0, 1); 1115 + cur_offset, cur_offset + num_bytes - 1, 1116 + locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1117 + EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 1118 + EXTENT_SET_PRIVATE2); 1135 1119 cur_offset = extent_end; 1136 1120 if (cur_offset > end) 1137 1121 break; ··· 1198 1178 root->fs_info->max_extent); 1199 1179 1200 1180 /* 1201 - * if we break a large extent up then leave delalloc_extents be, 1202 - * since we've already accounted for the large extent. 1181 + * if we break a large extent up then leave oustanding_extents 1182 + * be, since we've already accounted for the large extent. 1203 1183 */ 1204 1184 if (div64_u64(new_size + root->fs_info->max_extent - 1, 1205 1185 root->fs_info->max_extent) < num_extents) 1206 1186 return 0; 1207 1187 } 1208 1188 1209 - BTRFS_I(inode)->delalloc_extents++; 1189 + spin_lock(&BTRFS_I(inode)->accounting_lock); 1190 + BTRFS_I(inode)->outstanding_extents++; 1191 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 1210 1192 1211 1193 return 0; 1212 1194 } ··· 1239 1217 1240 1218 /* we're not bigger than the max, unreserve the space and go */ 1241 1219 if (new_size <= root->fs_info->max_extent) { 1242 - BTRFS_I(inode)->delalloc_extents--; 1220 + spin_lock(&BTRFS_I(inode)->accounting_lock); 1221 + BTRFS_I(inode)->outstanding_extents--; 1222 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 1243 1223 return 0; 1244 1224 } 1245 1225 ··· 1255 1231 root->fs_info->max_extent) > num_extents) 1256 1232 return 0; 1257 1233 1258 - BTRFS_I(inode)->delalloc_extents--; 1234 + spin_lock(&BTRFS_I(inode)->accounting_lock); 1235 + BTRFS_I(inode)->outstanding_extents--; 1236 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 1259 1237 1260 1238 return 0; 1261 1239 } ··· 1279 1253 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1280 1254 struct btrfs_root *root = BTRFS_I(inode)->root; 1281 1255 1282 - BTRFS_I(inode)->delalloc_extents++; 1256 + spin_lock(&BTRFS_I(inode)->accounting_lock); 1257 + BTRFS_I(inode)->outstanding_extents++; 1258 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 1283 1259 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1284 1260 spin_lock(&root->fs_info->delalloc_lock); 1285 1261 BTRFS_I(inode)->delalloc_bytes += end - start + 1; ··· 1309 1281 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1310 1282 struct btrfs_root *root = BTRFS_I(inode)->root; 1311 1283 1312 - BTRFS_I(inode)->delalloc_extents--; 1313 - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1284 + if (bits & EXTENT_DO_ACCOUNTING) { 1285 + spin_lock(&BTRFS_I(inode)->accounting_lock); 1286 + BTRFS_I(inode)->outstanding_extents--; 1287 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 1288 + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1289 + } 1314 1290 1315 1291 spin_lock(&root->fs_info->delalloc_lock); 1316 1292 if (state->end - state->start + 1 > ··· 3630 3598 { 3631 3599 struct btrfs_root *root; 3632 3600 3633 - if (!dentry->d_inode) 3634 - return 0; 3601 + if (!dentry->d_inode && !IS_ROOT(dentry)) 3602 + dentry = dentry->d_parent; 3635 3603 3636 - root = BTRFS_I(dentry->d_inode)->root; 3637 - if (btrfs_root_refs(&root->root_item) == 0) 3638 - return 1; 3604 + if (dentry->d_inode) { 3605 + root = BTRFS_I(dentry->d_inode)->root; 3606 + if (btrfs_root_refs(&root->root_item) == 0) 3607 + return 1; 3608 + } 3639 3609 return 0; 3640 3610 } 3641 3611 ··· 4842 4808 */ 4843 4809 clear_extent_bit(tree, page_start, page_end, 4844 4810 EXTENT_DIRTY | EXTENT_DELALLOC | 4845 - EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); 4811 + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 4812 + NULL, GFP_NOFS); 4846 4813 /* 4847 4814 * whoever cleared the private bit is responsible 4848 4815 * for the finish_ordered_io ··· 4856 4821 lock_extent(tree, page_start, page_end, GFP_NOFS); 4857 4822 } 4858 4823 clear_extent_bit(tree, page_start, page_end, 4859 - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 4860 - 1, 1, NULL, GFP_NOFS); 4824 + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 4825 + EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 4861 4826 __btrfs_releasepage(page, GFP_NOFS); 4862 4827 4863 4828 ClearPageChecked(page); ··· 4952 4917 * prepare_pages in the normal write path. 4953 4918 */ 4954 4919 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 4955 - EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); 4920 + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 4921 + GFP_NOFS); 4956 4922 4957 4923 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 4958 4924 if (ret) { ··· 5101 5065 return NULL; 5102 5066 ei->last_trans = 0; 5103 5067 ei->logged_trans = 0; 5104 - ei->delalloc_extents = 0; 5105 - ei->delalloc_reserved_extents = 0; 5068 + ei->outstanding_extents = 0; 5069 + ei->reserved_extents = 0; 5070 + spin_lock_init(&ei->accounting_lock); 5106 5071 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 5107 5072 INIT_LIST_HEAD(&ei->i_orphan); 5108 5073 INIT_LIST_HEAD(&ei->ordered_operations); ··· 5842 5805 .removexattr = btrfs_removexattr, 5843 5806 }; 5844 5807 5845 - struct dentry_operations btrfs_dentry_operations = { 5808 + const struct dentry_operations btrfs_dentry_operations = { 5846 5809 .d_delete = btrfs_dentry_delete, 5847 5810 };
+5 -2
fs/btrfs/ioctl.c
··· 830 830 out_unlock: 831 831 mutex_unlock(&inode->i_mutex); 832 832 if (!err) { 833 + shrink_dcache_sb(root->fs_info->sb); 833 834 btrfs_invalidate_inodes(dest); 834 835 d_delete(dentry); 835 836 } ··· 1123 1122 datao += off - key.offset; 1124 1123 datal -= off - key.offset; 1125 1124 } 1126 - if (key.offset + datao + datal > off + len) 1127 - datal = off + len - key.offset - datao; 1125 + 1126 + if (key.offset + datal > off + len) 1127 + datal = off + len - key.offset; 1128 + 1128 1129 /* disko == 0 means it's a hole */ 1129 1130 if (!disko) 1130 1131 datao = 0;
+6
fs/btrfs/ordered-data.c
··· 306 306 tree->last = NULL; 307 307 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 308 308 309 + spin_lock(&BTRFS_I(inode)->accounting_lock); 310 + BTRFS_I(inode)->outstanding_extents--; 311 + spin_unlock(&BTRFS_I(inode)->accounting_lock); 312 + btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, 313 + inode, 1); 314 + 309 315 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 310 316 list_del_init(&entry->root_extent_list); 311 317
+2 -2
fs/btrfs/relocation.c
··· 3518 3518 BUG_ON(!rc->block_group); 3519 3519 3520 3520 btrfs_init_workers(&rc->workers, "relocate", 3521 - fs_info->thread_pool_size); 3521 + fs_info->thread_pool_size, NULL); 3522 3522 3523 3523 rc->extent_root = extent_root; 3524 3524 btrfs_prepare_block_group_relocation(extent_root, rc->block_group); ··· 3701 3701 mapping_tree_init(&rc->reloc_root_tree); 3702 3702 INIT_LIST_HEAD(&rc->reloc_roots); 3703 3703 btrfs_init_workers(&rc->workers, "relocate", 3704 - root->fs_info->thread_pool_size); 3704 + root->fs_info->thread_pool_size, NULL); 3705 3705 rc->extent_root = root->fs_info->extent_root; 3706 3706 3707 3707 set_reloc_control(rc);
+11 -1
fs/btrfs/tree-log.c
··· 137 137 138 138 mutex_lock(&root->log_mutex); 139 139 if (root->log_root) { 140 + if (!root->log_start_pid) { 141 + root->log_start_pid = current->pid; 142 + root->log_multiple_pids = false; 143 + } else if (root->log_start_pid != current->pid) { 144 + root->log_multiple_pids = true; 145 + } 146 + 140 147 root->log_batch++; 141 148 atomic_inc(&root->log_writers); 142 149 mutex_unlock(&root->log_mutex); 143 150 return 0; 144 151 } 152 + root->log_multiple_pids = false; 153 + root->log_start_pid = current->pid; 145 154 mutex_lock(&root->fs_info->tree_log_mutex); 146 155 if (!root->fs_info->log_root_tree) { 147 156 ret = btrfs_init_log_root_tree(trans, root->fs_info); ··· 1994 1985 if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 1995 1986 wait_log_commit(trans, root, root->log_transid - 1); 1996 1987 1997 - while (1) { 1988 + while (root->log_multiple_pids) { 1998 1989 unsigned long batch = root->log_batch; 1999 1990 mutex_unlock(&root->log_mutex); 2000 1991 schedule_timeout_uninterruptible(1); ··· 2020 2011 root->log_batch = 0; 2021 2012 root->log_transid++; 2022 2013 log->log_transid = root->log_transid; 2014 + root->log_start_pid = 0; 2023 2015 smp_mb(); 2024 2016 /* 2025 2017 * log tree has been flushed to disk, new modifications of