Merge tag 'for-6.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

- fix infinite loop in readdir(), could happen in a big directory when
files get renamed during enumeration

- fix extent map handling of skipped pinned ranges

- fix a corner case when handling ordered extent length

- fix a potential crash when balance cancel races with pause

- verify correct uuid when starting scrub or device replace

* tag 'for-6.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix incorrect splitting in btrfs_drop_extent_map_range
btrfs: fix BUG_ON condition in btrfs_cancel_balance
btrfs: only subtract from len_to_oe_boundary when it is tracking an extent
btrfs: fix replace/scrub failure with metadata_uuid
btrfs: fix infinite directory reads

+1
fs/btrfs/ctree.h
··· 443 443 444 444 struct btrfs_file_private { 445 445 void *filldir_buf; 446 + u64 last_index; 446 447 struct extent_state *llseek_cached_state; 447 448 }; 448 449
+3 -2
fs/btrfs/delayed-inode.c
··· 1632 1632 } 1633 1633 1634 1634 bool btrfs_readdir_get_delayed_items(struct inode *inode, 1635 + u64 last_index, 1635 1636 struct list_head *ins_list, 1636 1637 struct list_head *del_list) 1637 1638 { ··· 1652 1651 1653 1652 mutex_lock(&delayed_node->mutex); 1654 1653 item = __btrfs_first_delayed_insertion_item(delayed_node); 1655 - while (item) { 1654 + while (item && item->index <= last_index) { 1656 1655 refcount_inc(&item->refs); 1657 1656 list_add_tail(&item->readdir_list, ins_list); 1658 1657 item = __btrfs_next_delayed_item(item); 1659 1658 } 1660 1659 1661 1660 item = __btrfs_first_delayed_deletion_item(delayed_node); 1662 - while (item) { 1661 + while (item && item->index <= last_index) { 1663 1662 refcount_inc(&item->refs); 1664 1663 list_add_tail(&item->readdir_list, del_list); 1665 1664 item = __btrfs_next_delayed_item(item);
+1
fs/btrfs/delayed-inode.h
··· 148 148 149 149 /* Used for readdir() */ 150 150 bool btrfs_readdir_get_delayed_items(struct inode *inode, 151 + u64 last_index, 151 152 struct list_head *ins_list, 152 153 struct list_head *del_list); 153 154 void btrfs_readdir_put_delayed_items(struct inode *inode,
+24 -1
fs/btrfs/extent_io.c
··· 902 902 size -= len; 903 903 pg_offset += len; 904 904 disk_bytenr += len; 905 - bio_ctrl->len_to_oe_boundary -= len; 905 + 906 + /* 907 + * len_to_oe_boundary defaults to U32_MAX, which isn't page or 908 + * sector aligned. alloc_new_bio() then sets it to the end of 909 + * our ordered extent for writes into zoned devices. 910 + * 911 + * When len_to_oe_boundary is tracking an ordered extent, we 912 + * trust the ordered extent code to align things properly, and 913 + * the check above to cap our write to the ordered extent 914 + * boundary is correct. 915 + * 916 + * When len_to_oe_boundary is U32_MAX, the cap above would 917 + * result in a 4095 byte IO for the last page right before 918 + * we hit the bio limit of UINT_MAX. bio_add_page() has all 919 + * the checks required to make sure we don't overflow the bio, 920 + * and we should just ignore len_to_oe_boundary completely 921 + * unless we're using it to track an ordered extent. 922 + * 923 + * It's pretty hard to make a bio sized U32_MAX, but it can 924 + * happen when the page cache is able to feed us contiguous 925 + * pages for large extents. 926 + */ 927 + if (bio_ctrl->len_to_oe_boundary != U32_MAX) 928 + bio_ctrl->len_to_oe_boundary -= len; 906 929 907 930 /* Ordered extent boundary: move on to a new bio. */ 908 931 if (bio_ctrl->len_to_oe_boundary == 0)
+2 -4
fs/btrfs/extent_map.c
··· 760 760 761 761 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 762 762 start = em_end; 763 - if (end != (u64)-1) 764 - len = start + len - em_end; 765 763 goto next; 766 764 } 767 765 ··· 827 829 if (!split) 828 830 goto remove_em; 829 831 } 830 - split->start = start + len; 831 - split->len = em_end - (start + len); 832 + split->start = end; 833 + split->len = em_end - end; 832 834 split->block_start = em->block_start; 833 835 split->flags = flags; 834 836 split->compress_type = em->compress_type;
+79 -52
fs/btrfs/inode.c
··· 5873 5873 } 5874 5874 5875 5875 /* 5876 + * Find the highest existing sequence number in a directory and then set the 5877 + * in-memory index_cnt variable to the first free sequence number. 5878 + */ 5879 + static int btrfs_set_inode_index_count(struct btrfs_inode *inode) 5880 + { 5881 + struct btrfs_root *root = inode->root; 5882 + struct btrfs_key key, found_key; 5883 + struct btrfs_path *path; 5884 + struct extent_buffer *leaf; 5885 + int ret; 5886 + 5887 + key.objectid = btrfs_ino(inode); 5888 + key.type = BTRFS_DIR_INDEX_KEY; 5889 + key.offset = (u64)-1; 5890 + 5891 + path = btrfs_alloc_path(); 5892 + if (!path) 5893 + return -ENOMEM; 5894 + 5895 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5896 + if (ret < 0) 5897 + goto out; 5898 + /* FIXME: we should be able to handle this */ 5899 + if (ret == 0) 5900 + goto out; 5901 + ret = 0; 5902 + 5903 + if (path->slots[0] == 0) { 5904 + inode->index_cnt = BTRFS_DIR_START_INDEX; 5905 + goto out; 5906 + } 5907 + 5908 + path->slots[0]--; 5909 + 5910 + leaf = path->nodes[0]; 5911 + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5912 + 5913 + if (found_key.objectid != btrfs_ino(inode) || 5914 + found_key.type != BTRFS_DIR_INDEX_KEY) { 5915 + inode->index_cnt = BTRFS_DIR_START_INDEX; 5916 + goto out; 5917 + } 5918 + 5919 + inode->index_cnt = found_key.offset + 1; 5920 + out: 5921 + btrfs_free_path(path); 5922 + return ret; 5923 + } 5924 + 5925 + static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) 5926 + { 5927 + if (dir->index_cnt == (u64)-1) { 5928 + int ret; 5929 + 5930 + ret = btrfs_inode_delayed_dir_index_count(dir); 5931 + if (ret) { 5932 + ret = btrfs_set_inode_index_count(dir); 5933 + if (ret) 5934 + return ret; 5935 + } 5936 + } 5937 + 5938 + *index = dir->index_cnt; 5939 + 5940 + return 0; 5941 + } 5942 + 5943 + /* 5876 5944 * All this infrastructure exists because dir_emit can fault, and we are holding 5877 5945 * the tree lock when doing readdir. For now just allocate a buffer and copy 5878 5946 * our information into that, and then dir_emit from the buffer. This is ··· 5952 5884 static int btrfs_opendir(struct inode *inode, struct file *file) 5953 5885 { 5954 5886 struct btrfs_file_private *private; 5887 + u64 last_index; 5888 + int ret; 5889 + 5890 + ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); 5891 + if (ret) 5892 + return ret; 5955 5893 5956 5894 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); 5957 5895 if (!private) 5958 5896 return -ENOMEM; 5897 + private->last_index = last_index; 5959 5898 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); 5960 5899 if (!private->filldir_buf) { 5961 5900 kfree(private); ··· 6029 5954 6030 5955 INIT_LIST_HEAD(&ins_list); 6031 5956 INIT_LIST_HEAD(&del_list); 6032 - put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); 5957 + put = btrfs_readdir_get_delayed_items(inode, private->last_index, 5958 + &ins_list, &del_list); 6033 5959 6034 5960 again: 6035 5961 key.type = BTRFS_DIR_INDEX_KEY; ··· 6048 5972 break; 6049 5973 if (found_key.offset < ctx->pos) 6050 5974 continue; 5975 + if (found_key.offset > private->last_index) 5976 + break; 6051 5977 if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) 6052 5978 continue; 6053 5979 di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); ··· 6183 6105 if (flags & S_ATIME) 6184 6106 inode->i_atime = *now; 6185 6107 return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0; 6186 - } 6187 - 6188 - /* 6189 - * find the highest existing sequence number in a directory 6190 - * and then set the in-memory index_cnt variable to reflect 6191 - * free sequence numbers 6192 - */ 6193 - static int btrfs_set_inode_index_count(struct btrfs_inode *inode) 6194 - { 6195 - struct btrfs_root *root = inode->root; 6196 - struct btrfs_key key, found_key; 6197 - struct btrfs_path *path; 6198 - struct extent_buffer *leaf; 6199 - int ret; 6200 - 6201 - key.objectid = btrfs_ino(inode); 6202 - key.type = BTRFS_DIR_INDEX_KEY; 6203 - key.offset = (u64)-1; 6204 - 6205 - path = btrfs_alloc_path(); 6206 - if (!path) 6207 - return -ENOMEM; 6208 - 6209 - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6210 - if (ret < 0) 6211 - goto out; 6212 - /* FIXME: we should be able to handle this */ 6213 - if (ret == 0) 6214 - goto out; 6215 - ret = 0; 6216 - 6217 - if (path->slots[0] == 0) { 6218 - inode->index_cnt = BTRFS_DIR_START_INDEX; 6219 - goto out; 6220 - } 6221 - 6222 - path->slots[0]--; 6223 - 6224 - leaf = path->nodes[0]; 6225 - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6226 - 6227 - if (found_key.objectid != btrfs_ino(inode) || 6228 - found_key.type != BTRFS_DIR_INDEX_KEY) { 6229 - inode->index_cnt = BTRFS_DIR_START_INDEX; 6230 - goto out; 6231 - } 6232 - 6233 - inode->index_cnt = found_key.offset + 1; 6234 - out: 6235 - btrfs_free_path(path); 6236 - return ret; 6237 6108 } 6238 6109 6239 6110 /*
+2 -1
fs/btrfs/scrub.c
··· 605 605 btrfs_stack_header_bytenr(header), logical); 606 606 return; 607 607 } 608 - if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) { 608 + if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid, 609 + BTRFS_FSID_SIZE) != 0) { 609 610 bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 610 611 bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 611 612 btrfs_warn_rl(fs_info,
+1 -2
fs/btrfs/volumes.c
··· 4638 4638 } 4639 4639 } 4640 4640 4641 - BUG_ON(fs_info->balance_ctl || 4642 - test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)); 4641 + ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)); 4643 4642 atomic_dec(&fs_info->balance_cancel_req); 4644 4643 mutex_unlock(&fs_info->balance_mutex); 4645 4644 return 0;