Btrfs: fix free space cache when there are pinned extents and clusters V2

I noticed a huge problem with the free space cache that was presenting
as an early ENOSPC. Turns out when writing the free space cache out I
forgot to take into account pinned extents and more importantly
clusters. This would result in us leaking free space everytime we
unmounted the filesystem and remounted it.

I fix this by making sure to check and see if the current block group
has a cluster and writing out any entries that are in the cluster to the
cache, as well as writing any pinned extents we currently have to the
cache since those will be available for us to use the next time the fs
mounts.

This patch also adds a check to the end of load_free_space_cache to make
sure we got the right amount of free space cache, and if not make sure
to clear the cache and re-cache the old fashioned way.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

authored by Josef Bacik and committed by Chris Mason 43be2146 08fe4db1

+78 -4
+78 -4
fs/btrfs/free-space-cache.c
··· 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 #include "disk-io.h" 27 28 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) ··· 225 u64 num_entries; 226 u64 num_bitmaps; 227 u64 generation; 228 u32 cur_crc = ~(u32)0; 229 pgoff_t index = 0; 230 unsigned long first_page_offset; ··· 471 index++; 472 } 473 474 ret = 1; 475 out: 476 kfree(checksums); ··· 510 struct list_head *pos, *n; 511 struct page *page; 512 struct extent_state *cached_state = NULL; 513 struct list_head bitmap_list; 514 struct btrfs_key key; 515 u64 bytes = 0; 516 u32 *crc, *checksums; 517 pgoff_t index = 0, last_index = 0; ··· 523 int entries = 0; 524 int bitmaps = 0; 525 int ret = 0; 526 527 root = root->fs_info->tree_root; 528 ··· 570 */ 571 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 572 573 /* 574 * Lock all pages first so we can lock the extent safely. 575 * ··· 611 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 612 0, &cached_state, GFP_NOFS); 613 614 /* Write out the extent entries */ 615 do { 616 struct btrfs_free_space_entry *entry; 617 void *addr; 618 unsigned long offset = 0; 619 unsigned long start_offset = 0; 620 621 if (index == 0) { 622 start_offset = first_page_offset; ··· 637 entry = addr + start_offset; 638 639 memset(addr, 0, PAGE_CACHE_SIZE); 640 - while (1) { 641 struct btrfs_free_space *e; 642 643 e = rb_entry(node, struct btrfs_free_space, offset_index); ··· 653 entry->type = BTRFS_FREE_SPACE_EXTENT; 654 } 655 node = rb_next(node); 656 - if (!node) 657 - break; 658 offset += sizeof(struct btrfs_free_space_entry); 659 if (offset + sizeof(struct btrfs_free_space_entry) >= 660 PAGE_CACHE_SIZE) 661 break; 662 entry++; 663 } 664 *crc = ~(u32)0; ··· 726 page_cache_release(page); 727 728 index++; 729 - } while (node); 730 731 /* Write out the bitmaps */ 732 list_for_each_safe(pos, n, &bitmap_list) {
··· 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 #include "disk-io.h" 27 + #include "extent_io.h" 28 29 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 30 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) ··· 224 u64 num_entries; 225 u64 num_bitmaps; 226 u64 generation; 227 + u64 used = btrfs_block_group_used(&block_group->item); 228 u32 cur_crc = ~(u32)0; 229 pgoff_t index = 0; 230 unsigned long first_page_offset; ··· 469 index++; 470 } 471 472 + spin_lock(&block_group->tree_lock); 473 + if (block_group->free_space != (block_group->key.offset - used - 474 + block_group->bytes_super)) { 475 + spin_unlock(&block_group->tree_lock); 476 + printk(KERN_ERR "block group %llu has an wrong amount of free " 477 + "space\n", block_group->key.objectid); 478 + ret = 0; 479 + goto free_cache; 480 + } 481 + spin_unlock(&block_group->tree_lock); 482 + 483 ret = 1; 484 out: 485 kfree(checksums); ··· 497 struct list_head *pos, *n; 498 struct page *page; 499 struct extent_state *cached_state = NULL; 500 + struct btrfs_free_cluster *cluster = NULL; 501 + struct extent_io_tree *unpin = NULL; 502 struct list_head bitmap_list; 503 struct btrfs_key key; 504 + u64 start, end, len; 505 u64 bytes = 0; 506 u32 *crc, *checksums; 507 pgoff_t index = 0, last_index = 0; ··· 507 int entries = 0; 508 int bitmaps = 0; 509 int ret = 0; 510 + bool next_page = false; 511 512 root = root->fs_info->tree_root; 513 ··· 553 */ 554 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 555 556 + /* Get the cluster for this block_group if it exists */ 557 + if (!list_empty(&block_group->cluster_list)) 558 + cluster = list_entry(block_group->cluster_list.next, 559 + struct btrfs_free_cluster, 560 + block_group_list); 561 + 562 + /* 563 + * We shouldn't have switched the pinned extents yet so this is the 564 + * right one 565 + */ 566 + unpin = root->fs_info->pinned_extents; 567 + 568 /* 569 * Lock all pages first so we can lock the extent safely. 570 * ··· 582 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 583 0, &cached_state, GFP_NOFS); 584 585 + /* 586 + * When searching for pinned extents, we need to start at our start 587 + * offset. 588 + */ 589 + start = block_group->key.objectid; 590 + 591 /* Write out the extent entries */ 592 do { 593 struct btrfs_free_space_entry *entry; 594 void *addr; 595 unsigned long offset = 0; 596 unsigned long start_offset = 0; 597 + 598 + next_page = false; 599 600 if (index == 0) { 601 start_offset = first_page_offset; ··· 600 entry = addr + start_offset; 601 602 memset(addr, 0, PAGE_CACHE_SIZE); 603 + while (node && !next_page) { 604 struct btrfs_free_space *e; 605 606 e = rb_entry(node, struct btrfs_free_space, offset_index); ··· 616 entry->type = BTRFS_FREE_SPACE_EXTENT; 617 } 618 node = rb_next(node); 619 + if (!node && cluster) { 620 + node = rb_first(&cluster->root); 621 + cluster = NULL; 622 + } 623 offset += sizeof(struct btrfs_free_space_entry); 624 if (offset + sizeof(struct btrfs_free_space_entry) >= 625 PAGE_CACHE_SIZE) 626 + next_page = true; 627 + entry++; 628 + } 629 + 630 + /* 631 + * We want to add any pinned extents to our free space cache 632 + * so we don't leak the space 633 + */ 634 + while (!next_page && (start < block_group->key.objectid + 635 + block_group->key.offset)) { 636 + ret = find_first_extent_bit(unpin, start, &start, &end, 637 + EXTENT_DIRTY); 638 + if (ret) { 639 + ret = 0; 640 break; 641 + } 642 + 643 + /* This pinned extent is out of our range */ 644 + if (start >= block_group->key.objectid + 645 + block_group->key.offset) 646 + break; 647 + 648 + len = block_group->key.objectid + 649 + block_group->key.offset - start; 650 + len = min(len, end + 1 - start); 651 + 652 + entries++; 653 + entry->offset = cpu_to_le64(start); 654 + entry->bytes = cpu_to_le64(len); 655 + entry->type = BTRFS_FREE_SPACE_EXTENT; 656 + 657 + start = end + 1; 658 + offset += sizeof(struct btrfs_free_space_entry); 659 + if (offset + sizeof(struct btrfs_free_space_entry) >= 660 + PAGE_CACHE_SIZE) 661 + next_page = true; 662 entry++; 663 } 664 *crc = ~(u32)0; ··· 652 page_cache_release(page); 653 654 index++; 655 + } while (node || next_page); 656 657 /* Write out the bitmaps */ 658 list_for_each_safe(pos, n, &bitmap_list) {