Btrfs: fix free space cache when there are pinned extents and clusters V2

I noticed a huge problem with the free space cache that was presenting
as an early ENOSPC. Turns out when writing the free space cache out I
forgot to take into account pinned extents and more importantly
clusters. This would result in us leaking free space everytime we
unmounted the filesystem and remounted it.

I fix this by making sure to check and see if the current block group
has a cluster and writing out any entries that are in the cluster to the
cache, as well as writing any pinned extents we currently have to the
cache since those will be available for us to use the next time the fs
mounts.

This patch also adds a check to the end of load_free_space_cache to make
sure we got the right amount of free space cache, and if not make sure
to clear the cache and re-cache the old fashioned way.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

authored by Josef Bacik and committed by Chris Mason 43be2146 08fe4db1

+78 -4
+78 -4
fs/btrfs/free-space-cache.c
··· 24 24 #include "free-space-cache.h" 25 25 #include "transaction.h" 26 26 #include "disk-io.h" 27 + #include "extent_io.h" 27 28 28 29 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29 30 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) ··· 225 224 u64 num_entries; 226 225 u64 num_bitmaps; 227 226 u64 generation; 227 + u64 used = btrfs_block_group_used(&block_group->item); 228 228 u32 cur_crc = ~(u32)0; 229 229 pgoff_t index = 0; 230 230 unsigned long first_page_offset; ··· 471 469 index++; 472 470 } 473 471 472 + spin_lock(&block_group->tree_lock); 473 + if (block_group->free_space != (block_group->key.offset - used - 474 + block_group->bytes_super)) { 475 + spin_unlock(&block_group->tree_lock); 476 + printk(KERN_ERR "block group %llu has an wrong amount of free " 477 + "space\n", block_group->key.objectid); 478 + ret = 0; 479 + goto free_cache; 480 + } 481 + spin_unlock(&block_group->tree_lock); 482 + 474 483 ret = 1; 475 484 out: 476 485 kfree(checksums); ··· 510 497 struct list_head *pos, *n; 511 498 struct page *page; 512 499 struct extent_state *cached_state = NULL; 500 + struct btrfs_free_cluster *cluster = NULL; 501 + struct extent_io_tree *unpin = NULL; 513 502 struct list_head bitmap_list; 514 503 struct btrfs_key key; 504 + u64 start, end, len; 515 505 u64 bytes = 0; 516 506 u32 *crc, *checksums; 517 507 pgoff_t index = 0, last_index = 0; ··· 523 507 int entries = 0; 524 508 int bitmaps = 0; 525 509 int ret = 0; 510 + bool next_page = false; 526 511 527 512 root = root->fs_info->tree_root; 528 513 ··· 570 553 */ 571 554 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 572 555 556 + /* Get the cluster for this block_group if it exists */ 557 + if (!list_empty(&block_group->cluster_list)) 558 + cluster = list_entry(block_group->cluster_list.next, 559 + struct btrfs_free_cluster, 560 + block_group_list); 561 + 562 + /* 563 + * We shouldn't have switched the pinned extents yet so this is the 564 + * right one 565 + */ 566 + unpin = root->fs_info->pinned_extents; 567 + 573 568 /* 574 569 * Lock all pages first so we can lock the extent safely. 575 570 * ··· 611 582 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 612 583 0, &cached_state, GFP_NOFS); 613 584 585 + /* 586 + * When searching for pinned extents, we need to start at our start 587 + * offset. 588 + */ 589 + start = block_group->key.objectid; 590 + 614 591 /* Write out the extent entries */ 615 592 do { 616 593 struct btrfs_free_space_entry *entry; 617 594 void *addr; 618 595 unsigned long offset = 0; 619 596 unsigned long start_offset = 0; 597 + 598 + next_page = false; 620 599 621 600 if (index == 0) { 622 601 start_offset = first_page_offset; ··· 637 600 entry = addr + start_offset; 638 601 639 602 memset(addr, 0, PAGE_CACHE_SIZE); 640 - while (1) { 603 + while (node && !next_page) { 641 604 struct btrfs_free_space *e; 642 605 643 606 e = rb_entry(node, struct btrfs_free_space, offset_index); ··· 653 616 entry->type = BTRFS_FREE_SPACE_EXTENT; 654 617 } 655 618 node = rb_next(node); 656 - if (!node) 657 - break; 619 + if (!node && cluster) { 620 + node = rb_first(&cluster->root); 621 + cluster = NULL; 622 + } 658 623 offset += sizeof(struct btrfs_free_space_entry); 659 624 if (offset + sizeof(struct btrfs_free_space_entry) >= 660 625 PAGE_CACHE_SIZE) 626 + next_page = true; 627 + entry++; 628 + } 629 + 630 + /* 631 + * We want to add any pinned extents to our free space cache 632 + * so we don't leak the space 633 + */ 634 + while (!next_page && (start < block_group->key.objectid + 635 + block_group->key.offset)) { 636 + ret = find_first_extent_bit(unpin, start, &start, &end, 637 + EXTENT_DIRTY); 638 + if (ret) { 639 + ret = 0; 661 640 break; 641 + } 642 + 643 + /* This pinned extent is out of our range */ 644 + if (start >= block_group->key.objectid + 645 + block_group->key.offset) 646 + break; 647 + 648 + len = block_group->key.objectid + 649 + block_group->key.offset - start; 650 + len = min(len, end + 1 - start); 651 + 652 + entries++; 653 + entry->offset = cpu_to_le64(start); 654 + entry->bytes = cpu_to_le64(len); 655 + entry->type = BTRFS_FREE_SPACE_EXTENT; 656 + 657 + start = end + 1; 658 + offset += sizeof(struct btrfs_free_space_entry); 659 + if (offset + sizeof(struct btrfs_free_space_entry) >= 660 + PAGE_CACHE_SIZE) 661 + next_page = true; 662 662 entry++; 663 663 } 664 664 *crc = ~(u32)0; ··· 726 652 page_cache_release(page); 727 653 728 654 index++; 729 - } while (node); 655 + } while (node || next_page); 730 656 731 657 /* Write out the bitmaps */ 732 658 list_for_each_safe(pos, n, &bitmap_list) {