commit a075f23dd4b036ebaf918b3af477aa1f249ddfa0 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'for-5.5-rc8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fix from David Sterba:
"Here's a last minute fix for a regression introduced in this
development cycle.

There's a small chance of a silent corruption when device replace and
NOCOW data writes happen at the same time in one block group. Metadata
or COW data writes are unaffected.

The extra fixup patch is there to silence an unnecessary warning"

* tag 'for-5.5-rc8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: dev-replace: remove warning for unknown return codes when finished
btrfs: scrub: Require mandatory block group RO for dev-replace

Linus Torvalds 6 years ago a075f23d 93d1a05e

+29 -9

2 changed files

expand all

unified split

btrfs

dev-replace.c

scrub.c

+1 -4

fs/btrfs/dev-replace.c

··· 500 &dev_replace->scrub_progress, 0, 1); 501 502 ret = btrfs_dev_replace_finishing(fs_info, ret); 503 - if (ret == -EINPROGRESS) { 504 ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; 505 - } else if (ret != -ECANCELED) { 506 - WARN_ON(ret); 507 - } 508 509 return ret; 510

··· 500 &dev_replace->scrub_progress, 0, 1); 501 502 ret = btrfs_dev_replace_finishing(fs_info, ret); 503 + if (ret == -EINPROGRESS) 504 ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; 505 506 return ret; 507

+28 -5

fs/btrfs/scrub.c

··· 3577 * This can easily boost the amount of SYSTEM chunks if cleaner 3578 * thread can't be triggered fast enough, and use up all space 3579 * of btrfs_super_block::sys_chunk_array 3580 */ 3581 - ret = btrfs_inc_block_group_ro(cache, false); 3582 - scrub_pause_off(fs_info); 3583 - 3584 if (ret == 0) { 3585 ro_set = 1; 3586 - } else if (ret == -ENOSPC) { 3587 /* 3588 * btrfs_inc_block_group_ro return -ENOSPC when it 3589 * failed in creating new chunk for metadata. 3590 - * It is not a problem for scrub/replace, because 3591 * metadata are always cowed, and our scrub paused 3592 * commit_transactions. 3593 */ ··· 3606 btrfs_warn(fs_info, 3607 "failed setting block group ro: %d", ret); 3608 btrfs_put_block_group(cache); 3609 break; 3610 } 3611 3612 down_write(&dev_replace->rwsem); 3613 dev_replace->cursor_right = found_key.offset + length; 3614 dev_replace->cursor_left = found_key.offset;

··· 3577 * This can easily boost the amount of SYSTEM chunks if cleaner 3578 * thread can't be triggered fast enough, and use up all space 3579 * of btrfs_super_block::sys_chunk_array 3580 + * 3581 + * While for dev replace, we need to try our best to mark block 3582 + * group RO, to prevent race between: 3583 + * - Write duplication 3584 + * Contains latest data 3585 + * - Scrub copy 3586 + * Contains data from commit tree 3587 + * 3588 + * If target block group is not marked RO, nocow writes can 3589 + * be overwritten by scrub copy, causing data corruption. 3590 + * So for dev-replace, it's not allowed to continue if a block 3591 + * group is not RO. 3592 */ 3593 + ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); 3594 if (ret == 0) { 3595 ro_set = 1; 3596 + } else if (ret == -ENOSPC && !sctx->is_dev_replace) { 3597 /* 3598 * btrfs_inc_block_group_ro return -ENOSPC when it 3599 * failed in creating new chunk for metadata. 3600 + * It is not a problem for scrub, because 3601 * metadata are always cowed, and our scrub paused 3602 * commit_transactions. 3603 */ ··· 3596 btrfs_warn(fs_info, 3597 "failed setting block group ro: %d", ret); 3598 btrfs_put_block_group(cache); 3599 + scrub_pause_off(fs_info); 3600 break; 3601 } 3602 3603 + /* 3604 + * Now the target block is marked RO, wait for nocow writes to 3605 + * finish before dev-replace. 3606 + * COW is fine, as COW never overwrites extents in commit tree. 3607 + */ 3608 + if (sctx->is_dev_replace) { 3609 + btrfs_wait_nocow_writers(cache); 3610 + btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, 3611 + cache->length); 3612 + } 3613 + 3614 + scrub_pause_off(fs_info); 3615 down_write(&dev_replace->rwsem); 3616 dev_replace->cursor_right = found_key.offset + length; 3617 dev_replace->cursor_left = found_key.offset;