Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: fix to account dirty data in __get_secs_required()

It will trigger system panic w/ testcase in [1]:

------------[ cut here ]------------
kernel BUG at fs/f2fs/segment.c:2752!
RIP: 0010:new_curseg+0xc81/0x2110
Call Trace:
f2fs_allocate_data_block+0x1c91/0x4540
do_write_page+0x163/0xdf0
f2fs_outplace_write_data+0x1aa/0x340
f2fs_do_write_data_page+0x797/0x2280
f2fs_write_single_data_page+0x16cd/0x2190
f2fs_write_cache_pages+0x994/0x1c80
f2fs_write_data_pages+0x9cc/0xea0
do_writepages+0x194/0x7a0
filemap_fdatawrite_wbc+0x12b/0x1a0
__filemap_fdatawrite_range+0xbb/0xf0
file_write_and_wait_range+0xa1/0x110
f2fs_do_sync_file+0x26f/0x1c50
f2fs_sync_file+0x12b/0x1d0
vfs_fsync_range+0xfa/0x230
do_fsync+0x3d/0x80
__x64_sys_fsync+0x37/0x50
x64_sys_call+0x1e88/0x20d0
do_syscall_64+0x4b/0x110
entry_SYSCALL_64_after_hwframe+0x76/0x7e

The root cause is if checkpoint_disabling and lfs_mode are both on,
it will trigger OPU for all overwritten data, it may cost more free
segment than expected, so f2fs must account those data correctly to
calculate cosumed free segments later, and return ENOSPC earlier to
avoid run out of free segment during block allocation.

[1] https://lore.kernel.org/fstests/20241015025106.3203676-1-chao@kernel.org/

Fixes: 4354994f097d ("f2fs: checkpoint disabling")
Cc: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Chao Yu and committed by
Jaegeuk Kim
1acd73ed b7d0a97b

+25 -10
+25 -10
fs/f2fs/segment.h
··· 561 561 } 562 562 563 563 static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, 564 - unsigned int node_blocks, unsigned int dent_blocks) 564 + unsigned int node_blocks, unsigned int data_blocks, 565 + unsigned int dent_blocks) 565 566 { 566 567 567 - unsigned segno, left_blocks; 568 + unsigned int segno, left_blocks, blocks; 568 569 int i; 569 570 570 - /* check current node sections in the worst case. */ 571 - for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { 571 + /* check current data/node sections in the worst case. */ 572 + for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) { 572 573 segno = CURSEG_I(sbi, i)->segno; 573 574 left_blocks = CAP_BLKS_PER_SEC(sbi) - 574 575 get_ckpt_valid_blocks(sbi, segno, true); 575 - if (node_blocks > left_blocks) 576 + 577 + blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks; 578 + if (blocks > left_blocks) 576 579 return false; 577 580 } 578 581 ··· 589 586 } 590 587 591 588 /* 592 - * calculate needed sections for dirty node/dentry 593 - * and call has_curseg_enough_space 589 + * calculate needed sections for dirty node/dentry and call 590 + * has_curseg_enough_space, please note that, it needs to account 591 + * dirty data as well in lfs mode when checkpoint is disabled. 594 592 */ 595 593 static inline void __get_secs_required(struct f2fs_sb_info *sbi, 596 594 unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) ··· 600 596 get_pages(sbi, F2FS_DIRTY_DENTS) + 601 597 get_pages(sbi, F2FS_DIRTY_IMETA); 602 598 unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); 599 + unsigned int total_data_blocks = 0; 603 600 unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); 604 601 unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); 602 + unsigned int data_secs = 0; 605 603 unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); 606 604 unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); 605 + unsigned int data_blocks = 0; 606 + 607 + if (f2fs_lfs_mode(sbi) && 608 + unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { 609 + total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); 610 + data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); 611 + data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); 612 + } 607 613 608 614 if (lower_p) 609 - *lower_p = node_secs + dent_secs; 615 + *lower_p = node_secs + dent_secs + data_secs; 610 616 if (upper_p) 611 617 *upper_p = node_secs + dent_secs + 612 - (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); 618 + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + 619 + (data_blocks ? 1 : 0); 613 620 if (curseg_p) 614 621 *curseg_p = has_curseg_enough_space(sbi, 615 - node_blocks, dent_blocks); 622 + node_blocks, data_blocks, dent_blocks); 616 623 } 617 624 618 625 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,