Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"Dave had a small collection of fixes to the new free space tree code,
one of which was keeping our sysfs files more up to date with feature
bits as different things get enabled (lzo, raid5/6, etc).

I should have kept the sysfs stuff for rc3, since we always manage to
trip over something. This time it was GFP_KERNEL from somewhere that
is NOFS only. Instead of rebasing it out I've put a revert in, and
we'll fix it properly for rc3.

Otherwise, Filipe fixed a btrfs DIO race and Qu Wenruo fixed up a
use-after-free in our tracepoints that Dave Jones reported"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Revert "btrfs: synchronize incompat feature bits with sysfs files"
btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc
btrfs: sysfs: check initialization state before updating features
Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
btrfs: async-thread: Fix a use-after-free error for trace
Btrfs: fix race between fsync and lockless direct IO writes
btrfs: add free space tree to the cow-only list
btrfs: add free space tree to lockdep classes
btrfs: tweak free space tree bitmap allocation
btrfs: tests: switch to GFP_KERNEL
btrfs: synchronize incompat feature bits with sysfs files
btrfs: sysfs: introduce helper for syncing bits with sysfs files
btrfs: sysfs: add free-space-tree bit attribute
btrfs: sysfs: fix typo in compat_ro attribute definition

+113 -32
+1 -1
fs/btrfs/async-thread.c
··· 328 328 list_add_tail(&work->ordered_list, &wq->ordered_list); 329 329 spin_unlock_irqrestore(&wq->list_lock, flags); 330 330 } 331 - queue_work(wq->normal_wq, &work->normal_work); 332 331 trace_btrfs_work_queued(work); 332 + queue_work(wq->normal_wq, &work->normal_work); 333 333 } 334 334 335 335 void btrfs_queue_work(struct btrfs_workqueue *wq,
+1 -1
fs/btrfs/disk-io.c
··· 182 182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 183 183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 184 184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, 185 + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" }, 185 186 { .id = 0, .name_stem = "tree" }, 186 187 }; 187 188 ··· 1788 1787 int again; 1789 1788 struct btrfs_trans_handle *trans; 1790 1789 1791 - set_freezable(); 1792 1790 do { 1793 1791 again = 0; 1794 1792
+16 -2
fs/btrfs/free-space-tree.c
··· 153 153 154 154 static unsigned long *alloc_bitmap(u32 bitmap_size) 155 155 { 156 + void *mem; 157 + 158 + /* 159 + * The allocation size varies, observed numbers were < 4K up to 16K. 160 + * Using vmalloc unconditionally would be too heavy, we'll try 161 + * contiguous allocations first. 162 + */ 163 + if (bitmap_size <= PAGE_SIZE) 164 + return kzalloc(bitmap_size, GFP_NOFS); 165 + 166 + mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN); 167 + if (mem) 168 + return mem; 169 + 156 170 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, 157 171 PAGE_KERNEL); 158 172 } ··· 303 289 304 290 ret = 0; 305 291 out: 306 - vfree(bitmap); 292 + kvfree(bitmap); 307 293 if (ret) 308 294 btrfs_abort_transaction(trans, root, ret); 309 295 return ret; ··· 452 438 453 439 ret = 0; 454 440 out: 455 - vfree(bitmap); 441 + kvfree(bitmap); 456 442 if (ret) 457 443 btrfs_abort_transaction(trans, root, ret); 458 444 return ret;
+28 -8
fs/btrfs/inode.c
··· 7116 7116 if (ret) 7117 7117 return ERR_PTR(ret); 7118 7118 7119 - em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 7120 - ins.offset, ins.offset, ins.offset, 0); 7121 - if (IS_ERR(em)) { 7122 - btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7123 - return em; 7124 - } 7125 - 7119 + /* 7120 + * Create the ordered extent before the extent map. This is to avoid 7121 + * races with the fast fsync path that would lead to it logging file 7122 + * extent items that point to disk extents that were not yet written to. 7123 + * The fast fsync path collects ordered extents into a local list and 7124 + * then collects all the new extent maps, so we must create the ordered 7125 + * extent first and make sure the fast fsync path collects any new 7126 + * ordered extents after collecting new extent maps as well. 7127 + * The fsync path simply can not rely on inode_dio_wait() because it 7128 + * causes deadlock with AIO. 7129 + */ 7126 7130 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 7127 7131 ins.offset, ins.offset, 0); 7128 7132 if (ret) { 7129 7133 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7130 - free_extent_map(em); 7131 7134 return ERR_PTR(ret); 7132 7135 } 7133 7136 7137 + em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 7138 + ins.offset, ins.offset, ins.offset, 0); 7139 + if (IS_ERR(em)) { 7140 + struct btrfs_ordered_extent *oe; 7141 + 7142 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7143 + oe = btrfs_lookup_ordered_extent(inode, start); 7144 + ASSERT(oe); 7145 + if (WARN_ON(!oe)) 7146 + return em; 7147 + set_bit(BTRFS_ORDERED_IOERR, &oe->flags); 7148 + set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); 7149 + btrfs_remove_ordered_extent(inode, oe); 7150 + /* Once for our lookup and once for the ordered extents tree. */ 7151 + btrfs_put_ordered_extent(oe); 7152 + btrfs_put_ordered_extent(oe); 7153 + } 7134 7154 return em; 7135 7155 } 7136 7156
+2 -1
fs/btrfs/relocation.c
··· 575 575 root_objectid == BTRFS_TREE_LOG_OBJECTID || 576 576 root_objectid == BTRFS_CSUM_TREE_OBJECTID || 577 577 root_objectid == BTRFS_UUID_TREE_OBJECTID || 578 - root_objectid == BTRFS_QUOTA_TREE_OBJECTID) 578 + root_objectid == BTRFS_QUOTA_TREE_OBJECTID || 579 + root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) 579 580 return 1; 580 581 return 0; 581 582 }
+35
fs/btrfs/sysfs.c
··· 202 202 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); 203 203 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); 204 204 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); 205 + BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); 205 206 206 207 static struct attribute *btrfs_supported_feature_attrs[] = { 207 208 BTRFS_FEAT_ATTR_PTR(mixed_backref), ··· 214 213 BTRFS_FEAT_ATTR_PTR(raid56), 215 214 BTRFS_FEAT_ATTR_PTR(skinny_metadata), 216 215 BTRFS_FEAT_ATTR_PTR(no_holes), 216 + BTRFS_FEAT_ATTR_PTR(free_space_tree), 217 217 NULL 218 218 }; 219 219 ··· 780 778 failure: 781 779 btrfs_sysfs_remove_mounted(fs_info); 782 780 return error; 781 + } 782 + 783 + 784 + /* 785 + * Change per-fs features in /sys/fs/btrfs/UUID/features to match current 786 + * values in superblock. Call after any changes to incompat/compat_ro flags 787 + */ 788 + void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, 789 + u64 bit, enum btrfs_feature_set set) 790 + { 791 + struct btrfs_fs_devices *fs_devs; 792 + struct kobject *fsid_kobj; 793 + u64 features; 794 + int ret; 795 + 796 + if (!fs_info) 797 + return; 798 + 799 + features = get_features(fs_info, set); 800 + ASSERT(bit & supported_feature_masks[set]); 801 + 802 + fs_devs = fs_info->fs_devices; 803 + fsid_kobj = &fs_devs->fsid_kobj; 804 + 805 + if (!fsid_kobj->state_initialized) 806 + return; 807 + 808 + /* 809 + * FIXME: this is too heavy to update just one value, ideally we'd like 810 + * to use sysfs_update_group but some refactoring is needed first. 811 + */ 812 + sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group); 813 + ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); 783 814 } 784 815 785 816 static int btrfs_init_debugfs(void)
+4 -1
fs/btrfs/sysfs.h
··· 56 56 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ 57 57 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) 58 58 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ 59 - BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) 59 + BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature) 60 60 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ 61 61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) 62 62 ··· 90 90 struct kobject *parent); 91 91 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); 92 92 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); 93 + void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, 94 + u64 bit, enum btrfs_feature_set set); 95 + 93 96 #endif /* _BTRFS_SYSFS_H_ */
+5 -5
fs/btrfs/tests/btrfs-tests.c
··· 82 82 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) 83 83 { 84 84 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), 85 - GFP_NOFS); 85 + GFP_KERNEL); 86 86 87 87 if (!fs_info) 88 88 return fs_info; 89 89 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), 90 - GFP_NOFS); 90 + GFP_KERNEL); 91 91 if (!fs_info->fs_devices) { 92 92 kfree(fs_info); 93 93 return NULL; 94 94 } 95 95 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), 96 - GFP_NOFS); 96 + GFP_KERNEL); 97 97 if (!fs_info->super_copy) { 98 98 kfree(fs_info->fs_devices); 99 99 kfree(fs_info); ··· 180 180 { 181 181 struct btrfs_block_group_cache *cache; 182 182 183 - cache = kzalloc(sizeof(*cache), GFP_NOFS); 183 + cache = kzalloc(sizeof(*cache), GFP_KERNEL); 184 184 if (!cache) 185 185 return NULL; 186 186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 187 - GFP_NOFS); 187 + GFP_KERNEL); 188 188 if (!cache->free_space_ctl) { 189 189 kfree(cache); 190 190 return NULL;
+6 -6
fs/btrfs/tests/extent-io-tests.c
··· 94 94 * test. 95 95 */ 96 96 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { 97 - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 97 + page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); 98 98 if (!page) { 99 99 test_msg("Failed to allocate test page\n"); 100 100 ret = -ENOMEM; ··· 113 113 * |--- delalloc ---| 114 114 * |--- search ---| 115 115 */ 116 - set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); 116 + set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL); 117 117 start = 0; 118 118 end = 0; 119 119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, ··· 144 144 test_msg("Couldn't find the locked page\n"); 145 145 goto out_bits; 146 146 } 147 - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); 147 + set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL); 148 148 start = test_start; 149 149 end = 0; 150 150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, ··· 199 199 * 200 200 * We are re-using our test_start from above since it works out well. 201 201 */ 202 - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); 202 + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL); 203 203 start = test_start; 204 204 end = 0; 205 205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, ··· 262 262 } 263 263 ret = 0; 264 264 out_bits: 265 - clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); 265 + clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); 266 266 out: 267 267 if (locked_page) 268 268 page_cache_release(locked_page); ··· 360 360 361 361 test_msg("Running extent buffer bitmap tests\n"); 362 362 363 - bitmap = kmalloc(len, GFP_NOFS); 363 + bitmap = kmalloc(len, GFP_KERNEL); 364 364 if (!bitmap) { 365 365 test_msg("Couldn't allocate test bitmap\n"); 366 366 return -ENOMEM;
+4 -4
fs/btrfs/tests/inode-tests.c
··· 974 974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, 975 975 EXTENT_DELALLOC | EXTENT_DIRTY | 976 976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, 977 - NULL, GFP_NOFS); 977 + NULL, GFP_KERNEL); 978 978 if (ret) { 979 979 test_msg("clear_extent_bit returned %d\n", ret); 980 980 goto out; ··· 1045 1045 BTRFS_MAX_EXTENT_SIZE+8191, 1046 1046 EXTENT_DIRTY | EXTENT_DELALLOC | 1047 1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1048 - NULL, GFP_NOFS); 1048 + NULL, GFP_KERNEL); 1049 1049 if (ret) { 1050 1050 test_msg("clear_extent_bit returned %d\n", ret); 1051 1051 goto out; ··· 1079 1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1080 1080 EXTENT_DIRTY | EXTENT_DELALLOC | 1081 1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1082 - NULL, GFP_NOFS); 1082 + NULL, GFP_KERNEL); 1083 1083 if (ret) { 1084 1084 test_msg("clear_extent_bit returned %d\n", ret); 1085 1085 goto out; ··· 1096 1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1097 1097 EXTENT_DIRTY | EXTENT_DELALLOC | 1098 1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1099 - NULL, GFP_NOFS); 1099 + NULL, GFP_KERNEL); 1100 1100 iput(inode); 1101 1101 btrfs_free_dummy_root(root); 1102 1102 return ret;
+11 -3
fs/btrfs/tree-log.c
··· 4127 4127 struct inode *inode, 4128 4128 struct btrfs_path *path, 4129 4129 struct list_head *logged_list, 4130 - struct btrfs_log_ctx *ctx) 4130 + struct btrfs_log_ctx *ctx, 4131 + const u64 start, 4132 + const u64 end) 4131 4133 { 4132 4134 struct extent_map *em, *n; 4133 4135 struct list_head extents; ··· 4168 4166 } 4169 4167 4170 4168 list_sort(NULL, &extents, extent_cmp); 4171 - 4169 + /* 4170 + * Collect any new ordered extents within the range. This is to 4171 + * prevent logging file extent items without waiting for the disk 4172 + * location they point to being written. We do this only to deal 4173 + * with races against concurrent lockless direct IO writes. 4174 + */ 4175 + btrfs_get_logged_extents(inode, logged_list, start, end); 4172 4176 process: 4173 4177 while (!list_empty(&extents)) { 4174 4178 em = list_entry(extents.next, struct extent_map, list); ··· 4709 4701 goto out_unlock; 4710 4702 } 4711 4703 ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 4712 - &logged_list, ctx); 4704 + &logged_list, ctx, start, end); 4713 4705 if (ret) { 4714 4706 err = ret; 4715 4707 goto out_unlock;