Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: use a worker thread to do caching

A user reported a deadlock when copying a bunch of files. This is because they
were low on memory and kthreadd got hung up trying to migrate pages for an
allocation when starting the caching kthread. The page was locked by the person
starting the caching kthread. To fix this we just need to use the async thread
stuff so that the threads are already created and we don't have to worry about
deadlocks. Thanks,

Reported-by: Roman Mamedov <rm@romanrm.ru>
Signed-off-by: Josef Bacik <josef@redhat.com>

authored by

Josef Bacik and committed by
Chris Mason
bab39bf9 df98b6e2

+27 -29
+3 -1
fs/btrfs/ctree.h
··· 767 767 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 768 768 spinlock_t lock; 769 769 struct rw_semaphore groups_sem; 770 - atomic_t caching_threads; 771 770 wait_queue_head_t wait; 772 771 }; 773 772 ··· 827 828 struct list_head list; 828 829 struct mutex mutex; 829 830 wait_queue_head_t wait; 831 + struct btrfs_work work; 830 832 struct btrfs_block_group_cache *block_group; 831 833 u64 progress; 832 834 atomic_t count; ··· 1036 1036 struct btrfs_workers endio_write_workers; 1037 1037 struct btrfs_workers endio_freespace_worker; 1038 1038 struct btrfs_workers submit_workers; 1039 + struct btrfs_workers caching_workers; 1040 + 1039 1041 /* 1040 1042 * fixup workers take dirty pages that didn't properly go through 1041 1043 * the cow mechanism and make them safe to write. It happens
+6
fs/btrfs/disk-io.c
··· 1807 1807 fs_info->thread_pool_size), 1808 1808 &fs_info->generic_worker); 1809 1809 1810 + btrfs_init_workers(&fs_info->caching_workers, "cache", 1811 + 2, &fs_info->generic_worker); 1812 + 1810 1813 /* a higher idle thresh on the submit workers makes it much more 1811 1814 * likely that bios will be send down in a sane order to the 1812 1815 * devices ··· 1863 1860 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1864 1861 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1865 1862 btrfs_start_workers(&fs_info->delayed_workers, 1); 1863 + btrfs_start_workers(&fs_info->caching_workers, 1); 1866 1864 1867 1865 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1868 1866 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, ··· 2121 2117 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2122 2118 btrfs_stop_workers(&fs_info->submit_workers); 2123 2119 btrfs_stop_workers(&fs_info->delayed_workers); 2120 + btrfs_stop_workers(&fs_info->caching_workers); 2124 2121 fail_alloc: 2125 2122 kfree(fs_info->delayed_root); 2126 2123 fail_iput: ··· 2589 2584 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2590 2585 btrfs_stop_workers(&fs_info->submit_workers); 2591 2586 btrfs_stop_workers(&fs_info->delayed_workers); 2587 + btrfs_stop_workers(&fs_info->caching_workers); 2592 2588 2593 2589 btrfs_close_devices(fs_info->fs_devices); 2594 2590 btrfs_mapping_tree_free(&fs_info->mapping_tree);
+18 -28
fs/btrfs/extent-tree.c
··· 320 320 return total_added; 321 321 } 322 322 323 - static int caching_kthread(void *data) 323 + static noinline void caching_thread(struct btrfs_work *work) 324 324 { 325 - struct btrfs_block_group_cache *block_group = data; 326 - struct btrfs_fs_info *fs_info = block_group->fs_info; 327 - struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 328 - struct btrfs_root *extent_root = fs_info->extent_root; 325 + struct btrfs_block_group_cache *block_group; 326 + struct btrfs_fs_info *fs_info; 327 + struct btrfs_caching_control *caching_ctl; 328 + struct btrfs_root *extent_root; 329 329 struct btrfs_path *path; 330 330 struct extent_buffer *leaf; 331 331 struct btrfs_key key; ··· 334 334 u32 nritems; 335 335 int ret = 0; 336 336 337 + caching_ctl = container_of(work, struct btrfs_caching_control, work); 338 + block_group = caching_ctl->block_group; 339 + fs_info = block_group->fs_info; 340 + extent_root = fs_info->extent_root; 341 + 337 342 path = btrfs_alloc_path(); 338 343 if (!path) 339 - return -ENOMEM; 344 + goto out; 340 345 341 346 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 342 347 ··· 438 433 free_excluded_extents(extent_root, block_group); 439 434 440 435 mutex_unlock(&caching_ctl->mutex); 436 + out: 441 437 wake_up(&caching_ctl->wait); 442 438 443 439 put_caching_control(caching_ctl); 444 - atomic_dec(&block_group->space_info->caching_threads); 445 440 btrfs_put_block_group(block_group); 446 - 447 - return 0; 448 441 } 449 442 450 443 static int cache_block_group(struct btrfs_block_group_cache *cache, ··· 452 449 { 453 450 struct btrfs_fs_info *fs_info = cache->fs_info; 454 451 struct btrfs_caching_control *caching_ctl; 455 - struct task_struct *tsk; 456 452 int ret = 0; 457 453 458 454 smp_mb(); ··· 503 501 caching_ctl->progress = cache->key.objectid; 504 502 /* one for caching kthread, one for caching block group list */ 505 503 atomic_set(&caching_ctl->count, 2); 504 + caching_ctl->work.func = caching_thread; 506 505 507 506 spin_lock(&cache->lock); 508 507 if (cache->cached != BTRFS_CACHE_NO) { ··· 519 516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 520 517 up_write(&fs_info->extent_commit_sem); 521 518 522 - atomic_inc(&cache->space_info->caching_threads); 523 519 btrfs_get_block_group(cache); 524 520 525 - tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 526 - cache->key.objectid); 527 - if (IS_ERR(tsk)) { 528 - ret = PTR_ERR(tsk); 529 - printk(KERN_ERR "error running thread %d\n", ret); 530 - BUG(); 531 - } 521 + btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); 532 522 533 523 return ret; 534 524 } ··· 2932 2936 init_waitqueue_head(&found->wait); 2933 2937 *space_info = found; 2934 2938 list_add_rcu(&found->list, &info->space_info); 2935 - atomic_set(&found->caching_threads, 0); 2936 2939 return 0; 2937 2940 } 2938 2941 ··· 4992 4997 } 4993 4998 4994 4999 /* 4995 - * We only want to start kthread caching if we are at 4996 - * the point where we will wait for caching to make 4997 - * progress, or if our ideal search is over and we've 4998 - * found somebody to start caching. 5000 + * The caching workers are limited to 2 threads, so we 5001 + * can queue as much work as we care to. 4999 5002 */ 5000 - if (loop > LOOP_CACHING_NOWAIT || 5001 - (loop > LOOP_FIND_IDEAL && 5002 - atomic_read(&space_info->caching_threads) < 2)) { 5003 + if (loop > LOOP_FIND_IDEAL) { 5003 5004 ret = cache_block_group(block_group, trans, 5004 5005 orig_root, 0); 5005 5006 BUG_ON(ret); ··· 5217 5226 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5218 5227 found_uncached_bg = false; 5219 5228 loop++; 5220 - if (!ideal_cache_percent && 5221 - atomic_read(&space_info->caching_threads)) 5229 + if (!ideal_cache_percent) 5222 5230 goto search; 5223 5231 5224 5232 /*