Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: fix NULL pointer crash when running balance and scrub concurrently

While running balance, scrub, fsstress concurrently we hit the
following kernel crash:

[56561.448845] BTRFS info (device sde): relocating block group 11005853696 flags 132
[56561.524077] BUG: unable to handle kernel NULL pointer dereference at 0000000000000078
[56561.524237] IP: [<ffffffffa038956d>] scrub_chunk.isra.12+0xdd/0x130 [btrfs]
[56561.524297] PGD 9be28067 PUD 7f3dd067 PMD 0
[56561.524325] Oops: 0000 [#1] SMP
[....]
[56561.527237] Call Trace:
[56561.527309] [<ffffffffa038980e>] scrub_enumerate_chunks+0x24e/0x490 [btrfs]
[56561.527392] [<ffffffff810abe00>] ? abort_exclusive_wait+0x50/0xb0
[56561.527476] [<ffffffffa038add4>] btrfs_scrub_dev+0x1a4/0x530 [btrfs]
[56561.527561] [<ffffffffa0368107>] btrfs_ioctl+0x13f7/0x2a90 [btrfs]
[56561.527639] [<ffffffff811c82f0>] do_vfs_ioctl+0x2e0/0x4c0
[56561.527712] [<ffffffff8109c384>] ? vtime_account_user+0x54/0x60
[56561.527788] [<ffffffff810f768c>] ? __audit_syscall_entry+0x9c/0xf0
[56561.527870] [<ffffffff811c8551>] SyS_ioctl+0x81/0xa0
[56561.527941] [<ffffffff815707f7>] tracesys+0xdd/0xe2
[...]
[56561.528304] RIP [<ffffffffa038956d>] scrub_chunk.isra.12+0xdd/0x130 [btrfs]
[56561.528395] RSP <ffff88004c0f5be8>
[56561.528454] CR2: 0000000000000078

This is because in btrfs_relocate_chunk(), we will free @bdev directly while
scrub may still hold extent mapping, and may access freed memory.

Fix this problem by wrapping freeing @bdev work into free_extent_map() which
is based on reference count.

Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Wang Shilong and committed by
Chris Mason
298a8f9c ced96edc

+6 -7
+2
fs/btrfs/extent_map.c
··· 75 75 if (atomic_dec_and_test(&em->refs)) { 76 76 WARN_ON(extent_map_in_tree(em)); 77 77 WARN_ON(!list_empty(&em->list)); 78 + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) 79 + kfree(em->bdev); 78 80 kmem_cache_free(extent_map_cache, em); 79 81 } 80 82 }
+1
fs/btrfs/extent_map.h
··· 15 15 #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ 16 16 #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ 17 17 #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ 18 + #define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */ 18 19 19 20 struct extent_map { 20 21 struct rb_node rb_node;
+3 -7
fs/btrfs/volumes.c
··· 2543 2543 remove_extent_mapping(em_tree, em); 2544 2544 write_unlock(&em_tree->lock); 2545 2545 2546 - kfree(map); 2547 - em->bdev = NULL; 2548 - 2549 2546 /* once for the tree */ 2550 2547 free_extent_map(em); 2551 2548 /* once for us */ ··· 4298 4301 4299 4302 em = alloc_extent_map(); 4300 4303 if (!em) { 4304 + kfree(map); 4301 4305 ret = -ENOMEM; 4302 4306 goto error; 4303 4307 } 4308 + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); 4304 4309 em->bdev = (struct block_device *)map; 4305 4310 em->start = start; 4306 4311 em->len = num_bytes; ··· 4345 4346 /* One for the tree reference */ 4346 4347 free_extent_map(em); 4347 4348 error: 4348 - kfree(map); 4349 4349 kfree(devices_info); 4350 4350 return ret; 4351 4351 } ··· 4556 4558 write_unlock(&tree->map_tree.lock); 4557 4559 if (!em) 4558 4560 break; 4559 - kfree(em->bdev); 4560 4561 /* once for us */ 4561 4562 free_extent_map(em); 4562 4563 /* once for the tree */ ··· 5819 5822 return -ENOMEM; 5820 5823 } 5821 5824 5825 + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); 5822 5826 em->bdev = (struct block_device *)map; 5823 5827 em->start = logical; 5824 5828 em->len = length; ··· 5844 5846 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, 5845 5847 uuid, NULL); 5846 5848 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { 5847 - kfree(map); 5848 5849 free_extent_map(em); 5849 5850 return -EIO; 5850 5851 } ··· 5851 5854 map->stripes[i].dev = 5852 5855 add_missing_dev(root, devid, uuid); 5853 5856 if (!map->stripes[i].dev) { 5854 - kfree(map); 5855 5857 free_extent_map(em); 5856 5858 return -EIO; 5857 5859 }