Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: avoid infinite GC loop due to stale atomic files

If committing atomic pages is failed when doing f2fs_do_sync_file(), we can
get commited pages but atomic_file being still set like:

- inmem: 0, atomic IO: 4 (Max. 10), volatile IO: 0 (Max. 0)

If GC selects this block, we can get an infinite loop like this:

f2fs_submit_page_bio: dev = (253,7), ino = 2, page_index = 0x2359a8, oldaddr = 0x2359a8, newaddr = 0x2359a8, rw = READ(), type = COLD_DATA
f2fs_submit_read_bio: dev = (253,7)/(253,7), rw = READ(), DATA, sector = 18533696, size = 4096
f2fs_get_victim: dev = (253,7), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 4355, cost = 1, ofs_unit = 1, pre_victim_secno = 4355, prefree = 0, free = 234
f2fs_iget: dev = (253,7), ino = 6247, pino = 5845, i_mode = 0x81b0, i_size = 319488, i_nlink = 1, i_blocks = 624, i_advise = 0x2c
f2fs_submit_page_bio: dev = (253,7), ino = 2, page_index = 0x2359a8, oldaddr = 0x2359a8, newaddr = 0x2359a8, rw = READ(), type = COLD_DATA
f2fs_submit_read_bio: dev = (253,7)/(253,7), rw = READ(), DATA, sector = 18533696, size = 4096
f2fs_get_victim: dev = (253,7), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 4355, cost = 1, ofs_unit = 1, pre_victim_secno = 4355, prefree = 0, free = 234
f2fs_iget: dev = (253,7), ino = 6247, pino = 5845, i_mode = 0x81b0, i_size = 319488, i_nlink = 1, i_blocks = 624, i_advise = 0x2c

In that moment, we can observe:

[Before]
Try to move 5084219 blocks (BG: 384508)
- data blocks : 4962373 (274483)
- node blocks : 121846 (110025)
Skipped : atomic write 4534686 (10)

[After]
Try to move 5088973 blocks (BG: 384508)
- data blocks : 4967127 (274483)
- node blocks : 121846 (110025)
Skipped : atomic write 4539440 (10)

So, refactor atomic_write flow like this:
1. start_atomic_write
- add inmem_list and set atomic_file

2. write()
- register it in inmem_pages

3. commit_atomic_write
- if no error, f2fs_drop_inmem_pages()
- f2fs_commit_inmme_pages() failed
: __revoked_inmem_pages() was done
- f2fs_do_sync_file failed
: abort_atomic_write later

4. abort_atomic_write
- f2fs_drop_inmem_pages

5. f2fs_drop_inmem_pages
- clear atomic_file
- remove inmem_list

Based on this change, when GC fails to move block in atomic_file,
f2fs_drop_inmem_pages_all() can call f2fs_drop_inmem_pages().

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

+18 -26
+10 -5
fs/f2fs/file.c
··· 1829 1829 static int f2fs_ioc_start_atomic_write(struct file *filp) 1830 1830 { 1831 1831 struct inode *inode = file_inode(filp); 1832 + struct f2fs_inode_info *fi = F2FS_I(inode); 1833 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1832 1834 int ret; 1833 1835 1834 1836 if (!inode_owner_or_capable(inode)) ··· 1873 1871 goto out; 1874 1872 } 1875 1873 1874 + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 1875 + if (list_empty(&fi->inmem_ilist)) 1876 + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); 1877 + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 1878 + 1879 + /* add inode in inmem_list first and set atomic_file */ 1876 1880 set_inode_flag(inode, FI_ATOMIC_FILE); 1877 1881 clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 1878 1882 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); ··· 1920 1912 goto err_out; 1921 1913 1922 1914 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1923 - if (!ret) { 1924 - clear_inode_flag(inode, FI_ATOMIC_FILE); 1925 - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0; 1926 - stat_dec_atomic_write(inode); 1927 - } 1915 + if (!ret) 1916 + f2fs_drop_inmem_pages(inode); 1928 1917 } else { 1929 1918 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 1930 1919 }
+8 -21
fs/f2fs/segment.c
··· 185 185 186 186 void f2fs_register_inmem_page(struct inode *inode, struct page *page) 187 187 { 188 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 189 - struct f2fs_inode_info *fi = F2FS_I(inode); 190 188 struct inmem_pages *new; 191 189 192 190 f2fs_trace_pid(page); ··· 198 200 INIT_LIST_HEAD(&new->list); 199 201 200 202 /* increase reference count with clean state */ 201 - mutex_lock(&fi->inmem_lock); 202 203 get_page(page); 203 - list_add_tail(&new->list, &fi->inmem_pages); 204 - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 205 - if (list_empty(&fi->inmem_ilist)) 206 - list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); 207 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 204 + mutex_lock(&F2FS_I(inode)->inmem_lock); 205 + list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); 208 206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 209 - mutex_unlock(&fi->inmem_lock); 207 + mutex_unlock(&F2FS_I(inode)->inmem_lock); 210 208 211 209 trace_f2fs_register_inmem_page(page, INMEM); 212 210 } ··· 324 330 mutex_lock(&fi->inmem_lock); 325 331 __revoke_inmem_pages(inode, &fi->inmem_pages, 326 332 true, false, true); 327 - 328 - if (list_empty(&fi->inmem_pages)) { 329 - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 330 - if (!list_empty(&fi->inmem_ilist)) 331 - list_del_init(&fi->inmem_ilist); 332 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 333 - } 334 333 mutex_unlock(&fi->inmem_lock); 335 334 } 336 335 337 336 clear_inode_flag(inode, FI_ATOMIC_FILE); 338 337 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; 339 338 stat_dec_atomic_write(inode); 339 + 340 + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 341 + if (!list_empty(&fi->inmem_ilist)) 342 + list_del_init(&fi->inmem_ilist); 343 + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 340 344 } 341 345 342 346 void f2fs_drop_inmem_page(struct inode *inode, struct page *page) ··· 463 471 464 472 mutex_lock(&fi->inmem_lock); 465 473 err = __f2fs_commit_inmem_pages(inode); 466 - 467 - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 468 - if (!list_empty(&fi->inmem_ilist)) 469 - list_del_init(&fi->inmem_ilist); 470 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 471 474 mutex_unlock(&fi->inmem_lock); 472 475 473 476 clear_inode_flag(inode, FI_ATOMIC_COMMIT);