Merge branch 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"Some fixes that Dave Sterba collected.

We've been hitting an early enospc problem on production machines that
Omar tracked down to an old int->u64 mistake. I waited a bit on this
pull to make sure it was really the problem from production, but it's
on ~2100 hosts now and I think we're good.

Omar also noticed a commit in the queue would make new early ENOSPC
problems. I pulled that out for now, which is why the top three
commits are younger than the rest.

Otherwise these are all fixes, some explaining very old bugs that
we've been poking at for a while"

* 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix delalloc accounting leak caused by u32 overflow
Btrfs: clear EXTENT_DEFRAG bits in finish_ordered_io
btrfs: tree-log.c: Wrong printk information about namelen
btrfs: fix race with relocation recovery and fs_root setup
btrfs: fix memory leak in update_space_info failure path
btrfs: use correct types for page indices in btrfs_page_exists_in_range
btrfs: fix incorrect error return ret being passed to mapping_set_error
btrfs: Make flush bios explicitely sync
btrfs: fiemap: Cache and merge fiemap extent before submit it to user

+139 -16
+2 -2
fs/btrfs/ctree.h
··· 2563 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, 2564 unsigned num_items) 2565 { 2566 - return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; 2567 } 2568 2569 /* ··· 2573 static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, 2574 unsigned num_items) 2575 { 2576 - return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; 2577 } 2578 2579 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
··· 2563 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, 2564 unsigned num_items) 2565 { 2566 + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; 2567 } 2568 2569 /* ··· 2573 static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, 2574 unsigned num_items) 2575 { 2576 + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; 2577 } 2578 2579 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+1 -1
fs/btrfs/dir-item.c
··· 468 469 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 470 btrfs_crit(fs_info, "invalid dir item name len: %u", 471 - (unsigned)btrfs_dir_data_len(leaf, dir_item)); 472 return 1; 473 } 474
··· 468 469 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 470 btrfs_crit(fs_info, "invalid dir item name len: %u", 471 + (unsigned)btrfs_dir_name_len(leaf, dir_item)); 472 return 1; 473 } 474
+6 -4
fs/btrfs/disk-io.c
··· 3467 * we fua the first super. The others we allow 3468 * to go down lazy. 3469 */ 3470 - if (i == 0) 3471 - ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); 3472 - else 3473 ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); 3474 if (ret) 3475 errors++; 3476 } ··· 3537 3538 bio->bi_end_io = btrfs_end_empty_barrier; 3539 bio->bi_bdev = device->bdev; 3540 - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 3541 init_completion(&device->flush_wait); 3542 bio->bi_private = &device->flush_wait; 3543 device->flush_bio = bio;
··· 3467 * we fua the first super. The others we allow 3468 * to go down lazy. 3469 */ 3470 + if (i == 0) { 3471 + ret = btrfsic_submit_bh(REQ_OP_WRITE, 3472 + REQ_SYNC | REQ_FUA, bh); 3473 + } else { 3474 ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); 3475 + } 3476 if (ret) 3477 errors++; 3478 } ··· 3535 3536 bio->bi_end_io = btrfs_end_empty_barrier; 3537 bio->bi_bdev = device->bdev; 3538 + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; 3539 init_completion(&device->flush_wait); 3540 bio->bi_private = &device->flush_wait; 3541 device->flush_bio = bio;
+4 -3
fs/btrfs/extent-tree.c
··· 3993 info->space_info_kobj, "%s", 3994 alloc_name(found->flags)); 3995 if (ret) { 3996 kfree(found); 3997 return ret; 3998 } ··· 4845 spin_unlock(&delayed_rsv->lock); 4846 4847 commit: 4848 - trans = btrfs_join_transaction(fs_info->fs_root); 4849 if (IS_ERR(trans)) 4850 return -ENOSPC; 4851 ··· 4863 struct btrfs_space_info *space_info, u64 num_bytes, 4864 u64 orig_bytes, int state) 4865 { 4866 - struct btrfs_root *root = fs_info->fs_root; 4867 struct btrfs_trans_handle *trans; 4868 int nr; 4869 int ret = 0; ··· 5063 int flush_state = FLUSH_DELAYED_ITEMS_NR; 5064 5065 spin_lock(&space_info->lock); 5066 - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, 5067 space_info); 5068 if (!to_reclaim) { 5069 spin_unlock(&space_info->lock);
··· 3993 info->space_info_kobj, "%s", 3994 alloc_name(found->flags)); 3995 if (ret) { 3996 + percpu_counter_destroy(&found->total_bytes_pinned); 3997 kfree(found); 3998 return ret; 3999 } ··· 4844 spin_unlock(&delayed_rsv->lock); 4845 4846 commit: 4847 + trans = btrfs_join_transaction(fs_info->extent_root); 4848 if (IS_ERR(trans)) 4849 return -ENOSPC; 4850 ··· 4862 struct btrfs_space_info *space_info, u64 num_bytes, 4863 u64 orig_bytes, int state) 4864 { 4865 + struct btrfs_root *root = fs_info->extent_root; 4866 struct btrfs_trans_handle *trans; 4867 int nr; 4868 int ret = 0; ··· 5062 int flush_state = FLUSH_DELAYED_ITEMS_NR; 5063 5064 spin_lock(&space_info->lock); 5065 + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root, 5066 space_info); 5067 if (!to_reclaim) { 5068 spin_unlock(&space_info->lock);
+123 -3
fs/btrfs/extent_io.c
··· 2458 if (!uptodate) { 2459 ClearPageUptodate(page); 2460 SetPageError(page); 2461 - ret = ret < 0 ? ret : -EIO; 2462 mapping_set_error(page->mapping, ret); 2463 } 2464 } ··· 4377 return NULL; 4378 } 4379 4380 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4381 __u64 start, __u64 len, get_extent_t *get_extent) 4382 { ··· 4511 struct extent_state *cached_state = NULL; 4512 struct btrfs_path *path; 4513 struct btrfs_root *root = BTRFS_I(inode)->root; 4514 int end = 0; 4515 u64 em_start = 0; 4516 u64 em_len = 0; ··· 4691 flags |= FIEMAP_EXTENT_LAST; 4692 end = 1; 4693 } 4694 - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 4695 - em_len, flags); 4696 if (ret) { 4697 if (ret == 1) 4698 ret = 0; ··· 4700 } 4701 } 4702 out_free: 4703 free_extent_map(em); 4704 out: 4705 btrfs_free_path(path);
··· 2458 if (!uptodate) { 2459 ClearPageUptodate(page); 2460 SetPageError(page); 2461 + ret = err < 0 ? err : -EIO; 2462 mapping_set_error(page->mapping, ret); 2463 } 2464 } ··· 4377 return NULL; 4378 } 4379 4380 + /* 4381 + * To cache previous fiemap extent 4382 + * 4383 + * Will be used for merging fiemap extent 4384 + */ 4385 + struct fiemap_cache { 4386 + u64 offset; 4387 + u64 phys; 4388 + u64 len; 4389 + u32 flags; 4390 + bool cached; 4391 + }; 4392 + 4393 + /* 4394 + * Helper to submit fiemap extent. 4395 + * 4396 + * Will try to merge current fiemap extent specified by @offset, @phys, 4397 + * @len and @flags with cached one. 4398 + * And only when we fails to merge, cached one will be submitted as 4399 + * fiemap extent. 4400 + * 4401 + * Return value is the same as fiemap_fill_next_extent(). 4402 + */ 4403 + static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, 4404 + struct fiemap_cache *cache, 4405 + u64 offset, u64 phys, u64 len, u32 flags) 4406 + { 4407 + int ret = 0; 4408 + 4409 + if (!cache->cached) 4410 + goto assign; 4411 + 4412 + /* 4413 + * Sanity check, extent_fiemap() should have ensured that new 4414 + * fiemap extent won't overlap with cahced one. 4415 + * Not recoverable. 4416 + * 4417 + * NOTE: Physical address can overlap, due to compression 4418 + */ 4419 + if (cache->offset + cache->len > offset) { 4420 + WARN_ON(1); 4421 + return -EINVAL; 4422 + } 4423 + 4424 + /* 4425 + * Only merges fiemap extents if 4426 + * 1) Their logical addresses are continuous 4427 + * 4428 + * 2) Their physical addresses are continuous 4429 + * So truly compressed (physical size smaller than logical size) 4430 + * extents won't get merged with each other 4431 + * 4432 + * 3) Share same flags except FIEMAP_EXTENT_LAST 4433 + * So regular extent won't get merged with prealloc extent 4434 + */ 4435 + if (cache->offset + cache->len == offset && 4436 + cache->phys + cache->len == phys && 4437 + (cache->flags & ~FIEMAP_EXTENT_LAST) == 4438 + (flags & ~FIEMAP_EXTENT_LAST)) { 4439 + cache->len += len; 4440 + cache->flags |= flags; 4441 + goto try_submit_last; 4442 + } 4443 + 4444 + /* Not mergeable, need to submit cached one */ 4445 + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 4446 + cache->len, cache->flags); 4447 + cache->cached = false; 4448 + if (ret) 4449 + return ret; 4450 + assign: 4451 + cache->cached = true; 4452 + cache->offset = offset; 4453 + cache->phys = phys; 4454 + cache->len = len; 4455 + cache->flags = flags; 4456 + try_submit_last: 4457 + if (cache->flags & FIEMAP_EXTENT_LAST) { 4458 + ret = fiemap_fill_next_extent(fieinfo, cache->offset, 4459 + cache->phys, cache->len, cache->flags); 4460 + cache->cached = false; 4461 + } 4462 + return ret; 4463 + } 4464 + 4465 + /* 4466 + * Sanity check for fiemap cache 4467 + * 4468 + * All fiemap cache should be submitted by emit_fiemap_extent() 4469 + * Iteration should be terminated either by last fiemap extent or 4470 + * fieinfo->fi_extents_max. 4471 + * So no cached fiemap should exist. 4472 + */ 4473 + static int check_fiemap_cache(struct btrfs_fs_info *fs_info, 4474 + struct fiemap_extent_info *fieinfo, 4475 + struct fiemap_cache *cache) 4476 + { 4477 + int ret; 4478 + 4479 + if (!cache->cached) 4480 + return 0; 4481 + 4482 + /* Small and recoverbale problem, only to info developer */ 4483 + #ifdef CONFIG_BTRFS_DEBUG 4484 + WARN_ON(1); 4485 + #endif 4486 + btrfs_warn(fs_info, 4487 + "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", 4488 + cache->offset, cache->phys, cache->len, cache->flags); 4489 + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 4490 + cache->len, cache->flags); 4491 + cache->cached = false; 4492 + if (ret > 0) 4493 + ret = 0; 4494 + return ret; 4495 + } 4496 + 4497 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4498 __u64 start, __u64 len, get_extent_t *get_extent) 4499 { ··· 4394 struct extent_state *cached_state = NULL; 4395 struct btrfs_path *path; 4396 struct btrfs_root *root = BTRFS_I(inode)->root; 4397 + struct fiemap_cache cache = { 0 }; 4398 int end = 0; 4399 u64 em_start = 0; 4400 u64 em_len = 0; ··· 4573 flags |= FIEMAP_EXTENT_LAST; 4574 end = 1; 4575 } 4576 + ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko, 4577 + em_len, flags); 4578 if (ret) { 4579 if (ret == 1) 4580 ret = 0; ··· 4582 } 4583 } 4584 out_free: 4585 + if (!ret) 4586 + ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); 4587 free_extent_map(em); 4588 out: 4589 btrfs_free_path(path);
+3 -3
fs/btrfs/inode.c
··· 2952 2953 ret = test_range_bit(io_tree, ordered_extent->file_offset, 2954 ordered_extent->file_offset + ordered_extent->len - 1, 2955 - EXTENT_DEFRAG, 1, cached_state); 2956 if (ret) { 2957 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2958 if (0 && last_snapshot >= BTRFS_I(inode)->generation) ··· 7483 int found = false; 7484 void **pagep = NULL; 7485 struct page *page = NULL; 7486 - int start_idx; 7487 - int end_idx; 7488 7489 start_idx = start >> PAGE_SHIFT; 7490
··· 2952 2953 ret = test_range_bit(io_tree, ordered_extent->file_offset, 2954 ordered_extent->file_offset + ordered_extent->len - 1, 2955 + EXTENT_DEFRAG, 0, cached_state); 2956 if (ret) { 2957 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2958 if (0 && last_snapshot >= BTRFS_I(inode)->generation) ··· 7483 int found = false; 7484 void **pagep = NULL; 7485 struct page *page = NULL; 7486 + unsigned long start_idx; 7487 + unsigned long end_idx; 7488 7489 start_idx = start >> PAGE_SHIFT; 7490