Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: change the current atomic write way

Current atomic write has three major issues like below.
- keeps the updates in non-reclaimable memory space and they are even
hard to be migrated, which is not good for contiguous memory
allocation.
- disk spaces used for atomic files cannot be garbage collected, so
this makes it difficult for the filesystem to be defragmented.
- If atomic write operations hit the threshold of either memory usage
or garbage collection failure count, All the atomic write operations
will fail immediately.

To resolve the issues, I will keep a COW inode internally for all the
updates to be flushed from memory, when we need to flush them out in a
situation like high memory pressure. These COW inodes will be tagged
as orphan inodes to be reclaimed in case of sudden power-cut or system
failure during atomic writes.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Daeho Jeong and committed by
Jaegeuk Kim
3db1de0e 6213f5d4

+323 -466
+115 -65
fs/f2fs/data.c
··· 69 69 70 70 if (f2fs_is_compressed_page(page)) 71 71 return false; 72 - if ((S_ISREG(inode->i_mode) && 73 - (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || 72 + if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || 74 73 page_private_gcing(page)) 75 74 return true; 76 75 return false; ··· 2562 2563 bool ipu_force = false; 2563 2564 int err = 0; 2564 2565 2565 - set_new_dnode(&dn, inode, NULL, NULL, 0); 2566 + /* Use COW inode to make dnode_of_data for atomic write */ 2567 + if (f2fs_is_atomic_file(inode)) 2568 + set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); 2569 + else 2570 + set_new_dnode(&dn, inode, NULL, NULL, 0); 2571 + 2566 2572 if (need_inplace_update(fio) && 2567 2573 f2fs_lookup_extent_cache(inode, page->index, &ei)) { 2568 2574 fio->old_blkaddr = ei.blk + page->index - ei.fofs; ··· 2604 2600 err = -EFSCORRUPTED; 2605 2601 goto out_writepage; 2606 2602 } 2603 + 2607 2604 /* 2608 2605 * If current allocation needs SSR, 2609 2606 * it had better in-place writes for updated data. ··· 3318 3313 return err; 3319 3314 } 3320 3315 3316 + static int __find_data_block(struct inode *inode, pgoff_t index, 3317 + block_t *blk_addr) 3318 + { 3319 + struct dnode_of_data dn; 3320 + struct page *ipage; 3321 + struct extent_info ei = {0, }; 3322 + int err = 0; 3323 + 3324 + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); 3325 + if (IS_ERR(ipage)) 3326 + return PTR_ERR(ipage); 3327 + 3328 + set_new_dnode(&dn, inode, ipage, ipage, 0); 3329 + 3330 + if (f2fs_lookup_extent_cache(inode, index, &ei)) { 3331 + dn.data_blkaddr = ei.blk + index - ei.fofs; 3332 + } else { 3333 + /* hole case */ 3334 + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 3335 + if (err) { 3336 + dn.data_blkaddr = NULL_ADDR; 3337 + err = 0; 3338 + } 3339 + } 3340 + *blk_addr = dn.data_blkaddr; 3341 + f2fs_put_dnode(&dn); 3342 + return err; 3343 + } 3344 + 3345 + static int __reserve_data_block(struct inode *inode, pgoff_t index, 3346 + block_t *blk_addr, bool *node_changed) 3347 + { 3348 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3349 + struct dnode_of_data dn; 3350 + struct page *ipage; 3351 + int err = 0; 3352 + 3353 + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); 3354 + 3355 + ipage = f2fs_get_node_page(sbi, inode->i_ino); 3356 + if (IS_ERR(ipage)) { 3357 + err = PTR_ERR(ipage); 3358 + goto unlock_out; 3359 + } 3360 + set_new_dnode(&dn, inode, ipage, ipage, 0); 3361 + 3362 + err = f2fs_get_block(&dn, index); 3363 + 3364 + *blk_addr = dn.data_blkaddr; 3365 + *node_changed = dn.node_changed; 3366 + f2fs_put_dnode(&dn); 3367 + 3368 + unlock_out: 3369 + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); 3370 + return err; 3371 + } 3372 + 3373 + static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, 3374 + struct page *page, loff_t pos, unsigned int len, 3375 + block_t *blk_addr, bool *node_changed) 3376 + { 3377 + struct inode *inode = page->mapping->host; 3378 + struct inode *cow_inode = F2FS_I(inode)->cow_inode; 3379 + pgoff_t index = page->index; 3380 + int err = 0; 3381 + block_t ori_blk_addr; 3382 + 3383 + /* If pos is beyond the end of file, reserve a new block in COW inode */ 3384 + if ((pos & PAGE_MASK) >= i_size_read(inode)) 3385 + return __reserve_data_block(cow_inode, index, blk_addr, 3386 + node_changed); 3387 + 3388 + /* Look for the block in COW inode first */ 3389 + err = __find_data_block(cow_inode, index, blk_addr); 3390 + if (err) 3391 + return err; 3392 + else if (*blk_addr != NULL_ADDR) 3393 + return 0; 3394 + 3395 + /* Look for the block in the original inode */ 3396 + err = __find_data_block(inode, index, &ori_blk_addr); 3397 + if (err) 3398 + return err; 3399 + 3400 + /* Finally, we should reserve a new block in COW inode for the update */ 3401 + err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); 3402 + if (err) 3403 + return err; 3404 + 3405 + if (ori_blk_addr != NULL_ADDR) 3406 + *blk_addr = ori_blk_addr; 3407 + return 0; 3408 + } 3409 + 3321 3410 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 3322 3411 loff_t pos, unsigned len, unsigned flags, 3323 3412 struct page **pagep, void **fsdata) ··· 3420 3321 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3421 3322 struct page *page = NULL; 3422 3323 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; 3423 - bool need_balance = false, drop_atomic = false; 3324 + bool need_balance = false; 3424 3325 block_t blkaddr = NULL_ADDR; 3425 3326 int err = 0; 3426 3327 ··· 3428 3329 3429 3330 if (!f2fs_is_checkpoint_ready(sbi)) { 3430 3331 err = -ENOSPC; 3431 - goto fail; 3432 - } 3433 - 3434 - if ((f2fs_is_atomic_file(inode) && 3435 - !f2fs_available_free_memory(sbi, INMEM_PAGES)) || 3436 - is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { 3437 - err = -ENOMEM; 3438 - drop_atomic = true; 3439 3332 goto fail; 3440 3333 } 3441 3334 ··· 3478 3387 3479 3388 *pagep = page; 3480 3389 3481 - err = prepare_write_begin(sbi, page, pos, len, 3390 + if (f2fs_is_atomic_file(inode)) 3391 + err = prepare_atomic_write_begin(sbi, page, pos, len, 3392 + &blkaddr, &need_balance); 3393 + else 3394 + err = prepare_write_begin(sbi, page, pos, len, 3482 3395 &blkaddr, &need_balance); 3483 3396 if (err) 3484 3397 goto fail; ··· 3538 3443 fail: 3539 3444 f2fs_put_page(page, 1); 3540 3445 f2fs_write_failed(inode, pos + len); 3541 - if (drop_atomic) 3542 - f2fs_drop_inmem_pages_all(sbi, false); 3543 3446 return err; 3544 3447 } 3545 3448 ··· 3581 3488 set_page_dirty(page); 3582 3489 3583 3490 if (pos + copied > i_size_read(inode) && 3584 - !f2fs_verity_in_progress(inode)) 3491 + !f2fs_verity_in_progress(inode)) { 3585 3492 f2fs_i_size_write(inode, pos + copied); 3493 + if (f2fs_is_atomic_file(inode)) 3494 + f2fs_i_size_write(F2FS_I(inode)->cow_inode, 3495 + pos + copied); 3496 + } 3586 3497 unlock_out: 3587 3498 f2fs_put_page(page, 1); 3588 3499 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); ··· 3619 3522 inode->i_ino == F2FS_COMPRESS_INO(sbi)) 3620 3523 clear_page_private_data(&folio->page); 3621 3524 3622 - if (page_private_atomic(&folio->page)) 3623 - return f2fs_drop_inmem_page(inode, &folio->page); 3624 - 3625 3525 folio_detach_private(folio); 3626 3526 } 3627 3527 ··· 3626 3532 { 3627 3533 /* If this is dirty page, keep PagePrivate */ 3628 3534 if (PageDirty(page)) 3629 - return 0; 3630 - 3631 - /* This is atomic written page, keep Private */ 3632 - if (page_private_atomic(page)) 3633 3535 return 0; 3634 3536 3635 3537 if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) { ··· 3652 3562 if (!folio_test_uptodate(folio)) 3653 3563 folio_mark_uptodate(folio); 3654 3564 BUG_ON(folio_test_swapcache(folio)); 3655 - 3656 - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { 3657 - if (!page_private_atomic(&folio->page)) { 3658 - f2fs_register_inmem_page(inode, &folio->page); 3659 - return true; 3660 - } 3661 - /* 3662 - * Previously, this page has been registered, we just 3663 - * return here. 3664 - */ 3665 - return false; 3666 - } 3667 3565 3668 3566 if (!folio_test_dirty(folio)) { 3669 3567 filemap_dirty_folio(mapping, folio); ··· 3732 3654 int f2fs_migrate_page(struct address_space *mapping, 3733 3655 struct page *newpage, struct page *page, enum migrate_mode mode) 3734 3656 { 3735 - int rc, extra_count; 3736 - struct f2fs_inode_info *fi = F2FS_I(mapping->host); 3737 - bool atomic_written = page_private_atomic(page); 3657 + int rc, extra_count = 0; 3738 3658 3739 3659 BUG_ON(PageWriteback(page)); 3740 3660 3741 - /* migrating an atomic written page is safe with the inmem_lock hold */ 3742 - if (atomic_written) { 3743 - if (mode != MIGRATE_SYNC) 3744 - return -EBUSY; 3745 - if (!mutex_trylock(&fi->inmem_lock)) 3746 - return -EAGAIN; 3747 - } 3748 - 3749 - /* one extra reference was held for atomic_write page */ 3750 - extra_count = atomic_written ? 1 : 0; 3751 3661 rc = migrate_page_move_mapping(mapping, newpage, 3752 3662 page, extra_count); 3753 - if (rc != MIGRATEPAGE_SUCCESS) { 3754 - if (atomic_written) 3755 - mutex_unlock(&fi->inmem_lock); 3663 + if (rc != MIGRATEPAGE_SUCCESS) 3756 3664 return rc; 3757 - } 3758 - 3759 - if (atomic_written) { 3760 - struct inmem_pages *cur; 3761 - 3762 - list_for_each_entry(cur, &fi->inmem_pages, list) 3763 - if (cur->page == page) { 3764 - cur->page = newpage; 3765 - break; 3766 - } 3767 - mutex_unlock(&fi->inmem_lock); 3768 - put_page(page); 3769 - get_page(newpage); 3770 - } 3771 3665 3772 3666 /* guarantee to start from no stale private field */ 3773 3667 set_page_private(newpage, 0);
+2 -10
fs/f2fs/debug.c
··· 91 91 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 92 92 si->nquota_files = sbi->nquota_files; 93 93 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 94 - si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 95 94 si->aw_cnt = sbi->atomic_files; 96 95 si->vw_cnt = atomic_read(&sbi->vw_cnt); 97 96 si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); ··· 166 167 si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; 167 168 si->io_skip_bggc = sbi->io_skip_bggc; 168 169 si->other_skip_bggc = sbi->other_skip_bggc; 169 - si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; 170 - si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; 171 170 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) 172 171 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) 173 172 / 2; ··· 293 296 sizeof(struct nat_entry); 294 297 si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] * 295 298 sizeof(struct nat_entry_set); 296 - si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 297 299 for (i = 0; i < MAX_INO_ENTRY; i++) 298 300 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 299 301 si->cache_mem += atomic_read(&sbi->total_ext_tree) * ··· 487 491 si->bg_data_blks); 488 492 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, 489 493 si->bg_node_blks); 490 - seq_printf(s, "Skipped : atomic write %llu (%llu)\n", 491 - si->skipped_atomic_files[BG_GC] + 492 - si->skipped_atomic_files[FG_GC], 493 - si->skipped_atomic_files[BG_GC]); 494 494 seq_printf(s, "BG skip : IO: %u, Other: %u\n", 495 495 si->io_skip_bggc, si->other_skip_bggc); 496 496 seq_puts(s, "\nExtent Cache:\n"); ··· 511 519 si->flush_list_empty, 512 520 si->nr_discarding, si->nr_discarded, 513 521 si->nr_discard_cmd, si->undiscard_blks); 514 - seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " 522 + seq_printf(s, " - atomic IO: %4d (Max. %4d), " 515 523 "volatile IO: %4d (Max. %4d)\n", 516 - si->inmem_pages, si->aw_cnt, si->max_aw_cnt, 524 + si->aw_cnt, si->max_aw_cnt, 517 525 si->vw_cnt, si->max_vw_cnt); 518 526 seq_printf(s, " - compress: %4d, hit:%8d\n", si->compress_pages, si->compress_page_hit); 519 527 seq_printf(s, " - nodes: %4d in %4d\n",
+7 -26
fs/f2fs/f2fs.h
··· 716 716 717 717 enum { 718 718 GC_FAILURE_PIN, 719 - GC_FAILURE_ATOMIC, 720 719 MAX_GC_FAILURE 721 720 }; 722 721 ··· 737 738 FI_UPDATE_WRITE, /* inode has in-place-update data */ 738 739 FI_NEED_IPU, /* used for ipu per file */ 739 740 FI_ATOMIC_FILE, /* indicate atomic file */ 740 - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ 741 741 FI_VOLATILE_FILE, /* indicate volatile file */ 742 742 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ 743 743 FI_DROP_CACHE, /* drop dirty page cache */ ··· 750 752 FI_EXTRA_ATTR, /* indicate file has extra attribute */ 751 753 FI_PROJ_INHERIT, /* indicate file inherits projectid */ 752 754 FI_PIN_FILE, /* indicate file should not be gced */ 753 - FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ 754 755 FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ 755 756 FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ 756 757 FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ ··· 791 794 #endif 792 795 struct list_head dirty_list; /* dirty list for dirs and files */ 793 796 struct list_head gdirty_list; /* linked in global dirty list */ 794 - struct list_head inmem_ilist; /* list for inmem inodes */ 795 - struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 796 - struct task_struct *inmem_task; /* store inmemory task */ 797 - struct mutex inmem_lock; /* lock for inmemory pages */ 797 + struct task_struct *atomic_write_task; /* store atomic write task */ 798 798 struct extent_tree *extent_tree; /* cached extent_tree entry */ 799 + struct inode *cow_inode; /* copy-on-write inode for atomic write */ 799 800 800 801 /* avoid racing between foreground op and gc */ 801 802 struct f2fs_rwsem i_gc_rwsem[2]; ··· 1087 1092 F2FS_DIRTY_QDATA, 1088 1093 F2FS_DIRTY_NODES, 1089 1094 F2FS_DIRTY_META, 1090 - F2FS_INMEM_PAGES, 1091 1095 F2FS_DIRTY_IMETA, 1092 1096 F2FS_WB_CP_DATA, 1093 1097 F2FS_WB_DATA, ··· 1116 1122 META, 1117 1123 NR_PAGE_TYPE, 1118 1124 META_FLUSH, 1119 - INMEM, /* the below types are used by tracepoints only. */ 1120 - INMEM_DROP, 1121 - INMEM_INVALIDATE, 1122 - INMEM_REVOKE, 1123 - IPU, 1125 + IPU, /* the below types are used by tracepoints only. */ 1124 1126 OPU, 1125 1127 }; 1126 1128 ··· 1708 1718 1709 1719 /* for skip statistic */ 1710 1720 unsigned int atomic_files; /* # of opened atomic file */ 1711 - unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ 1712 1721 unsigned long long skipped_gc_rwsem; /* FG_GC only */ 1713 1722 1714 1723 /* threshold for gc trials on pinned files */ ··· 3191 3202 return is_inode_flag_set(inode, FI_ATOMIC_FILE); 3192 3203 } 3193 3204 3194 - static inline bool f2fs_is_commit_atomic_write(struct inode *inode) 3195 - { 3196 - return is_inode_flag_set(inode, FI_ATOMIC_COMMIT); 3197 - } 3198 - 3199 3205 static inline bool f2fs_is_volatile_file(struct inode *inode) 3200 3206 { 3201 3207 return is_inode_flag_set(inode, FI_VOLATILE_FILE); ··· 3428 3444 int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, 3429 3445 bool hot, bool set); 3430 3446 struct dentry *f2fs_get_parent(struct dentry *child); 3447 + int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, 3448 + struct inode **new_inode); 3431 3449 3432 3450 /* 3433 3451 * dir.c ··· 3565 3579 * segment.c 3566 3580 */ 3567 3581 bool f2fs_need_SSR(struct f2fs_sb_info *sbi); 3568 - void f2fs_register_inmem_page(struct inode *inode, struct page *page); 3569 - void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); 3570 - void f2fs_drop_inmem_pages(struct inode *inode); 3571 - void f2fs_drop_inmem_page(struct inode *inode, struct page *page); 3572 - int f2fs_commit_inmem_pages(struct inode *inode); 3582 + int f2fs_commit_atomic_write(struct inode *inode); 3583 + void f2fs_abort_atomic_write(struct inode *inode, bool clean); 3573 3584 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); 3574 3585 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg); 3575 3586 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); ··· 3798 3815 int ext_tree, zombie_tree, ext_node; 3799 3816 int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; 3800 3817 int ndirty_data, ndirty_qdata; 3801 - int inmem_pages; 3802 3818 unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; 3803 3819 int nats, dirty_nats, sits, dirty_sits; 3804 3820 int free_nids, avail_nids, alloc_nids; ··· 3827 3845 int bg_node_segs, bg_data_segs; 3828 3846 int tot_blks, data_blks, node_blks; 3829 3847 int bg_data_blks, bg_node_blks; 3830 - unsigned long long skipped_atomic_files[2]; 3831 3848 int curseg[NR_CURSEG_TYPE]; 3832 3849 int cursec[NR_CURSEG_TYPE]; 3833 3850 int curzone[NR_CURSEG_TYPE];
+27 -22
fs/f2fs/file.c
··· 1813 1813 atomic_read(&inode->i_writecount) != 1) 1814 1814 return 0; 1815 1815 1816 - /* some remained atomic pages should discarded */ 1817 1816 if (f2fs_is_atomic_file(inode)) 1818 - f2fs_drop_inmem_pages(inode); 1817 + f2fs_abort_atomic_write(inode, true); 1819 1818 if (f2fs_is_volatile_file(inode)) { 1820 1819 set_inode_flag(inode, FI_DROP_CACHE); 1821 1820 filemap_fdatawrite(inode->i_mapping); ··· 1836 1837 * before dropping file lock, it needs to do in ->flush. 1837 1838 */ 1838 1839 if (f2fs_is_atomic_file(inode) && 1839 - F2FS_I(inode)->inmem_task == current) 1840 - f2fs_drop_inmem_pages(inode); 1840 + F2FS_I(inode)->atomic_write_task == current) 1841 + f2fs_abort_atomic_write(inode, true); 1841 1842 return 0; 1842 1843 } 1843 1844 ··· 2000 2001 struct user_namespace *mnt_userns = file_mnt_user_ns(filp); 2001 2002 struct f2fs_inode_info *fi = F2FS_I(inode); 2002 2003 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2004 + struct inode *pinode; 2003 2005 int ret; 2004 2006 2005 2007 if (!inode_owner_or_capable(mnt_userns, inode)) ··· 2023 2023 goto out; 2024 2024 } 2025 2025 2026 - if (f2fs_is_atomic_file(inode)) { 2027 - if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) 2028 - ret = -EINVAL; 2026 + if (f2fs_is_atomic_file(inode)) 2029 2027 goto out; 2030 - } 2031 2028 2032 2029 ret = f2fs_convert_inline_inode(inode); 2033 2030 if (ret) ··· 2045 2048 goto out; 2046 2049 } 2047 2050 2051 + /* Create a COW inode for atomic write */ 2052 + pinode = f2fs_iget(inode->i_sb, fi->i_pino); 2053 + if (IS_ERR(pinode)) { 2054 + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 2055 + ret = PTR_ERR(pinode); 2056 + goto out; 2057 + } 2058 + 2059 + ret = f2fs_get_tmpfile(mnt_userns, pinode, &fi->cow_inode); 2060 + iput(pinode); 2061 + if (ret) { 2062 + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 2063 + goto out; 2064 + } 2065 + f2fs_i_size_write(fi->cow_inode, i_size_read(inode)); 2066 + 2048 2067 spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 2049 - if (list_empty(&fi->inmem_ilist)) 2050 - list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); 2051 2068 sbi->atomic_files++; 2052 2069 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 2053 2070 2054 - /* add inode in inmem_list first and set atomic_file */ 2055 2071 set_inode_flag(inode, FI_ATOMIC_FILE); 2056 - clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 2072 + set_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); 2073 + clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); 2057 2074 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 2058 2075 2059 2076 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2060 - F2FS_I(inode)->inmem_task = current; 2077 + F2FS_I(inode)->atomic_write_task = current; 2061 2078 stat_update_max_atomic_write(inode); 2062 2079 out: 2063 2080 inode_unlock(inode); ··· 2102 2091 } 2103 2092 2104 2093 if (f2fs_is_atomic_file(inode)) { 2105 - ret = f2fs_commit_inmem_pages(inode); 2094 + ret = f2fs_commit_atomic_write(inode); 2106 2095 if (ret) 2107 2096 goto err_out; 2108 2097 2109 2098 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2110 2099 if (!ret) 2111 - f2fs_drop_inmem_pages(inode); 2100 + f2fs_abort_atomic_write(inode, false); 2112 2101 } else { 2113 2102 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); 2114 2103 } 2115 2104 err_out: 2116 - if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { 2117 - clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 2118 - ret = -EINVAL; 2119 - } 2120 2105 inode_unlock(inode); 2121 2106 mnt_drop_write_file(filp); 2122 2107 return ret; ··· 2200 2193 inode_lock(inode); 2201 2194 2202 2195 if (f2fs_is_atomic_file(inode)) 2203 - f2fs_drop_inmem_pages(inode); 2196 + f2fs_abort_atomic_write(inode, true); 2204 2197 if (f2fs_is_volatile_file(inode)) { 2205 2198 clear_inode_flag(inode, FI_VOLATILE_FILE); 2206 2199 stat_dec_volatile_write(inode); 2207 2200 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 2208 2201 } 2209 - 2210 - clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 2211 2202 2212 2203 inode_unlock(inode); 2213 2204
+1 -26
fs/f2fs/gc.c
··· 1245 1245 goto out; 1246 1246 } 1247 1247 1248 - if (f2fs_is_atomic_file(inode)) { 1249 - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 1250 - F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 1251 - err = -EAGAIN; 1252 - goto out; 1253 - } 1254 - 1255 1248 err = f2fs_gc_pinned_control(inode, gc_type, segno); 1256 1249 if (err) 1257 1250 goto out; ··· 1386 1393 goto out; 1387 1394 } 1388 1395 1389 - if (f2fs_is_atomic_file(inode)) { 1390 - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; 1391 - F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; 1392 - err = -EAGAIN; 1393 - goto out; 1394 - } 1395 1396 err = f2fs_gc_pinned_control(inode, gc_type, segno); 1396 1397 if (err) 1397 1398 goto out; ··· 1752 1765 .ilist = LIST_HEAD_INIT(gc_list.ilist), 1753 1766 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), 1754 1767 }; 1755 - unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; 1756 - unsigned long long first_skipped; 1757 1768 unsigned int skipped_round = 0, round = 0; 1758 1769 1759 1770 trace_f2fs_gc_begin(sbi->sb, sync, background, ··· 1765 1780 1766 1781 cpc.reason = __get_cp_reason(sbi); 1767 1782 sbi->skipped_gc_rwsem = 0; 1768 - first_skipped = last_skipped; 1769 1783 gc_more: 1770 1784 if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { 1771 1785 ret = -EINVAL; ··· 1815 1831 total_freed += seg_freed; 1816 1832 1817 1833 if (gc_type == FG_GC) { 1818 - if (sbi->skipped_atomic_files[FG_GC] > last_skipped || 1819 - sbi->skipped_gc_rwsem) 1834 + if (sbi->skipped_gc_rwsem) 1820 1835 skipped_round++; 1821 - last_skipped = sbi->skipped_atomic_files[FG_GC]; 1822 1836 round++; 1823 1837 } 1824 1838 ··· 1839 1857 if (ret) 1840 1858 goto stop; 1841 1859 } 1842 - segno = NULL_SEGNO; 1843 - goto gc_more; 1844 - } 1845 - if (first_skipped < last_skipped && 1846 - (last_skipped - first_skipped) > 1847 - sbi->skipped_gc_rwsem) { 1848 - f2fs_drop_inmem_pages_all(sbi, true); 1849 1860 segno = NULL_SEGNO; 1850 1861 goto gc_more; 1851 1862 }
+1 -2
fs/f2fs/inode.c
··· 745 745 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 746 746 int err = 0; 747 747 748 - /* some remained atomic pages should discarded */ 749 748 if (f2fs_is_atomic_file(inode)) 750 - f2fs_drop_inmem_pages(inode); 749 + f2fs_abort_atomic_write(inode, true); 751 750 752 751 trace_f2fs_evict_inode(inode); 753 752 truncate_inode_pages_final(&inode->i_data);
+19 -9
fs/f2fs/namei.c
··· 840 840 } 841 841 842 842 static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, 843 - struct dentry *dentry, umode_t mode, 844 - struct inode **whiteout) 843 + struct dentry *dentry, umode_t mode, bool is_whiteout, 844 + struct inode **new_inode) 845 845 { 846 846 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 847 847 struct inode *inode; ··· 855 855 if (IS_ERR(inode)) 856 856 return PTR_ERR(inode); 857 857 858 - if (whiteout) { 858 + if (is_whiteout) { 859 859 init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); 860 860 inode->i_op = &f2fs_special_inode_operations; 861 861 } else { ··· 880 880 f2fs_add_orphan_inode(inode); 881 881 f2fs_alloc_nid_done(sbi, inode->i_ino); 882 882 883 - if (whiteout) { 883 + if (is_whiteout) { 884 884 f2fs_i_links_write(inode, false); 885 885 886 886 spin_lock(&inode->i_lock); 887 887 inode->i_state |= I_LINKABLE; 888 888 spin_unlock(&inode->i_lock); 889 - 890 - *whiteout = inode; 891 889 } else { 892 - d_tmpfile(dentry, inode); 890 + if (dentry) 891 + d_tmpfile(dentry, inode); 892 + else 893 + f2fs_i_links_write(inode, false); 893 894 } 894 895 /* link_count was changed by d_tmpfile as well. */ 895 896 f2fs_unlock_op(sbi); 896 897 unlock_new_inode(inode); 898 + 899 + if (new_inode) 900 + *new_inode = inode; 897 901 898 902 f2fs_balance_fs(sbi, true); 899 903 return 0; ··· 919 915 if (!f2fs_is_checkpoint_ready(sbi)) 920 916 return -ENOSPC; 921 917 922 - return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL); 918 + return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, false, NULL); 923 919 } 924 920 925 921 static int f2fs_create_whiteout(struct user_namespace *mnt_userns, ··· 929 925 return -EIO; 930 926 931 927 return __f2fs_tmpfile(mnt_userns, dir, NULL, 932 - S_IFCHR | WHITEOUT_MODE, whiteout); 928 + S_IFCHR | WHITEOUT_MODE, true, whiteout); 929 + } 930 + 931 + int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, 932 + struct inode **new_inode) 933 + { 934 + return __f2fs_tmpfile(mnt_userns, dir, NULL, S_IFREG, false, new_inode); 933 935 } 934 936 935 937 static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
-4
fs/f2fs/node.c
··· 90 90 atomic_read(&sbi->total_ext_node) * 91 91 sizeof(struct extent_node)) >> PAGE_SHIFT; 92 92 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 93 - } else if (type == INMEM_PAGES) { 94 - /* it allows 20% / total_ram for inmemory pages */ 95 - mem_size = get_pages(sbi, F2FS_INMEM_PAGES); 96 - res = mem_size < (val.totalram / 5); 97 93 } else if (type == DISCARD_CACHE) { 98 94 mem_size = (atomic_read(&dcc->discard_cmd_cnt) * 99 95 sizeof(struct discard_cmd)) >> PAGE_SHIFT;
-1
fs/f2fs/node.h
··· 147 147 DIRTY_DENTS, /* indicates dirty dentry pages */ 148 148 INO_ENTRIES, /* indicates inode entries */ 149 149 EXTENT_CACHE, /* indicates extent cache */ 150 - INMEM_PAGES, /* indicates inmemory pages */ 151 150 DISCARD_CACHE, /* indicates memory of cached discard cmds */ 152 151 COMPRESS_PAGE, /* indicates memory of cached compressed pages */ 153 152 BASE_CHECK, /* check kernel status */
+148 -272
fs/f2fs/segment.c
··· 30 30 static struct kmem_cache *discard_entry_slab; 31 31 static struct kmem_cache *discard_cmd_slab; 32 32 static struct kmem_cache *sit_entry_set_slab; 33 - static struct kmem_cache *inmem_entry_slab; 33 + static struct kmem_cache *revoke_entry_slab; 34 34 35 35 static unsigned long __reverse_ulong(unsigned char *str) 36 36 { ··· 185 185 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); 186 186 } 187 187 188 - void f2fs_register_inmem_page(struct inode *inode, struct page *page) 189 - { 190 - struct inmem_pages *new; 191 - 192 - set_page_private_atomic(page); 193 - 194 - new = f2fs_kmem_cache_alloc(inmem_entry_slab, 195 - GFP_NOFS, true, NULL); 196 - 197 - /* add atomic page indices to the list */ 198 - new->page = page; 199 - INIT_LIST_HEAD(&new->list); 200 - 201 - /* increase reference count with clean state */ 202 - get_page(page); 203 - mutex_lock(&F2FS_I(inode)->inmem_lock); 204 - list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages); 205 - inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 206 - mutex_unlock(&F2FS_I(inode)->inmem_lock); 207 - 208 - trace_f2fs_register_inmem_page(page, INMEM); 209 - } 210 - 211 - static int __revoke_inmem_pages(struct inode *inode, 212 - struct list_head *head, bool drop, bool recover, 213 - bool trylock) 188 + void f2fs_abort_atomic_write(struct inode *inode, bool clean) 214 189 { 215 190 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 216 - struct inmem_pages *cur, *tmp; 217 - int err = 0; 191 + struct f2fs_inode_info *fi = F2FS_I(inode); 192 + 193 + if (f2fs_is_atomic_file(inode)) { 194 + if (clean) 195 + truncate_inode_pages_final(inode->i_mapping); 196 + clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); 197 + iput(fi->cow_inode); 198 + fi->cow_inode = NULL; 199 + clear_inode_flag(inode, FI_ATOMIC_FILE); 200 + 201 + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 202 + sbi->atomic_files--; 203 + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 204 + } 205 + } 206 + 207 + static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, 208 + block_t new_addr, block_t *old_addr, bool recover) 209 + { 210 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 211 + struct dnode_of_data dn; 212 + struct node_info ni; 213 + int err; 214 + 215 + retry: 216 + set_new_dnode(&dn, inode, NULL, NULL, 0); 217 + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA); 218 + if (err) { 219 + if (err == -ENOMEM) { 220 + f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); 221 + goto retry; 222 + } 223 + return err; 224 + } 225 + 226 + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); 227 + if (err) { 228 + f2fs_put_dnode(&dn); 229 + return err; 230 + } 231 + 232 + if (recover) { 233 + /* dn.data_blkaddr is always valid */ 234 + if (!__is_valid_data_blkaddr(new_addr)) { 235 + if (new_addr == NULL_ADDR) 236 + dec_valid_block_count(sbi, inode, 1); 237 + f2fs_invalidate_blocks(sbi, dn.data_blkaddr); 238 + f2fs_update_data_blkaddr(&dn, new_addr); 239 + } else { 240 + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 241 + new_addr, ni.version, true, true); 242 + } 243 + } else { 244 + blkcnt_t count = 1; 245 + 246 + *old_addr = dn.data_blkaddr; 247 + f2fs_truncate_data_blocks_range(&dn, 1); 248 + dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count); 249 + inc_valid_block_count(sbi, inode, &count); 250 + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, 251 + ni.version, true, false); 252 + } 253 + 254 + f2fs_put_dnode(&dn); 255 + return 0; 256 + } 257 + 258 + static void __complete_revoke_list(struct inode *inode, struct list_head *head, 259 + bool revoke) 260 + { 261 + struct revoke_entry *cur, *tmp; 218 262 219 263 list_for_each_entry_safe(cur, tmp, head, list) { 220 - struct page *page = cur->page; 221 - 222 - if (drop) 223 - trace_f2fs_commit_inmem_page(page, INMEM_DROP); 224 - 225 - if (trylock) { 226 - /* 227 - * to avoid deadlock in between page lock and 228 - * inmem_lock. 229 - */ 230 - if (!trylock_page(page)) 231 - continue; 232 - } else { 233 - lock_page(page); 234 - } 235 - 236 - f2fs_wait_on_page_writeback(page, DATA, true, true); 237 - 238 - if (recover) { 239 - struct dnode_of_data dn; 240 - struct node_info ni; 241 - 242 - trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); 243 - retry: 244 - set_new_dnode(&dn, inode, NULL, NULL, 0); 245 - err = f2fs_get_dnode_of_data(&dn, page->index, 246 - LOOKUP_NODE); 247 - if (err) { 248 - if (err == -ENOMEM) { 249 - memalloc_retry_wait(GFP_NOFS); 250 - goto retry; 251 - } 252 - err = -EAGAIN; 253 - goto next; 254 - } 255 - 256 - err = f2fs_get_node_info(sbi, dn.nid, &ni, false); 257 - if (err) { 258 - f2fs_put_dnode(&dn); 259 - return err; 260 - } 261 - 262 - if (cur->old_addr == NEW_ADDR) { 263 - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); 264 - f2fs_update_data_blkaddr(&dn, NEW_ADDR); 265 - } else 266 - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, 267 - cur->old_addr, ni.version, true, true); 268 - f2fs_put_dnode(&dn); 269 - } 270 - next: 271 - /* we don't need to invalidate this in the sccessful status */ 272 - if (drop || recover) { 273 - ClearPageUptodate(page); 274 - clear_page_private_gcing(page); 275 - } 276 - detach_page_private(page); 277 - set_page_private(page, 0); 278 - f2fs_put_page(page, 1); 279 - 264 + if (revoke) 265 + __replace_atomic_write_block(inode, cur->index, 266 + cur->old_addr, NULL, true); 280 267 list_del(&cur->list); 281 - kmem_cache_free(inmem_entry_slab, cur); 282 - dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 268 + kmem_cache_free(revoke_entry_slab, cur); 283 269 } 284 - return err; 285 270 } 286 271 287 - void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) 288 - { 289 - struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; 290 - struct inode *inode; 291 - struct f2fs_inode_info *fi; 292 - unsigned int count = sbi->atomic_files; 293 - unsigned int looped = 0; 294 - next: 295 - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 296 - if (list_empty(head)) { 297 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 298 - return; 299 - } 300 - fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); 301 - inode = igrab(&fi->vfs_inode); 302 - if (inode) 303 - list_move_tail(&fi->inmem_ilist, head); 304 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 305 - 306 - if (inode) { 307 - if (gc_failure) { 308 - if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) 309 - goto skip; 310 - } 311 - set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); 312 - f2fs_drop_inmem_pages(inode); 313 - skip: 314 - iput(inode); 315 - } 316 - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); 317 - if (gc_failure) { 318 - if (++looped >= count) 319 - return; 320 - } 321 - goto next; 322 - } 323 - 324 - void f2fs_drop_inmem_pages(struct inode *inode) 272 + static int __f2fs_commit_atomic_write(struct inode *inode) 325 273 { 326 274 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 327 275 struct f2fs_inode_info *fi = F2FS_I(inode); 328 - 329 - do { 330 - mutex_lock(&fi->inmem_lock); 331 - if (list_empty(&fi->inmem_pages)) { 332 - fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; 333 - 334 - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); 335 - if (!list_empty(&fi->inmem_ilist)) 336 - list_del_init(&fi->inmem_ilist); 337 - if (f2fs_is_atomic_file(inode)) { 338 - clear_inode_flag(inode, FI_ATOMIC_FILE); 339 - sbi->atomic_files--; 340 - } 341 - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); 342 - 343 - mutex_unlock(&fi->inmem_lock); 344 - break; 345 - } 346 - __revoke_inmem_pages(inode, &fi->inmem_pages, 347 - true, false, true); 348 - mutex_unlock(&fi->inmem_lock); 349 - } while (1); 350 - } 351 - 352 - void f2fs_drop_inmem_page(struct inode *inode, struct page *page) 353 - { 354 - struct f2fs_inode_info *fi = F2FS_I(inode); 355 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 356 - struct list_head *head = &fi->inmem_pages; 357 - struct inmem_pages *cur = NULL; 358 - struct inmem_pages *tmp; 359 - 360 - f2fs_bug_on(sbi, !page_private_atomic(page)); 361 - 362 - mutex_lock(&fi->inmem_lock); 363 - list_for_each_entry(tmp, head, list) { 364 - if (tmp->page == page) { 365 - cur = tmp; 366 - break; 367 - } 368 - } 369 - 370 - f2fs_bug_on(sbi, !cur); 371 - list_del(&cur->list); 372 - mutex_unlock(&fi->inmem_lock); 373 - 374 - dec_page_count(sbi, F2FS_INMEM_PAGES); 375 - kmem_cache_free(inmem_entry_slab, cur); 376 - 377 - ClearPageUptodate(page); 378 - clear_page_private_atomic(page); 379 - f2fs_put_page(page, 0); 380 - 381 - detach_page_private(page); 382 - set_page_private(page, 0); 383 - 384 - trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); 385 - } 386 - 387 - static int __f2fs_commit_inmem_pages(struct inode *inode) 388 - { 389 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 390 - struct f2fs_inode_info *fi = F2FS_I(inode); 391 - struct inmem_pages *cur, *tmp; 392 - struct f2fs_io_info fio = { 393 - .sbi = sbi, 394 - .ino = inode->i_ino, 395 - .type = DATA, 396 - .op = REQ_OP_WRITE, 397 - .op_flags = REQ_SYNC | REQ_PRIO, 398 - .io_type = FS_DATA_IO, 399 - }; 276 + struct inode *cow_inode = fi->cow_inode; 277 + struct revoke_entry *new; 400 278 struct list_head revoke_list; 401 - bool submit_bio = false; 402 - int err = 0; 279 + block_t blkaddr; 280 + struct dnode_of_data dn; 281 + pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 282 + pgoff_t off = 0, blen, index; 283 + int ret = 0, i; 403 284 404 285 INIT_LIST_HEAD(&revoke_list); 405 286 406 - list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 407 - struct page *page = cur->page; 287 + while (len) { 288 + blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len); 408 289 409 - lock_page(page); 410 - if (page->mapping == inode->i_mapping) { 411 - trace_f2fs_commit_inmem_page(page, INMEM); 412 - 413 - f2fs_wait_on_page_writeback(page, DATA, true, true); 414 - 415 - set_page_dirty(page); 416 - if (clear_page_dirty_for_io(page)) { 417 - inode_dec_dirty_pages(inode); 418 - f2fs_remove_dirty_inode(inode); 419 - } 420 - retry: 421 - fio.page = page; 422 - fio.old_blkaddr = NULL_ADDR; 423 - fio.encrypted_page = NULL; 424 - fio.need_lock = LOCK_DONE; 425 - err = f2fs_do_write_data_page(&fio); 426 - if (err) { 427 - if (err == -ENOMEM) { 428 - memalloc_retry_wait(GFP_NOFS); 429 - goto retry; 430 - } 431 - unlock_page(page); 432 - break; 433 - } 434 - /* record old blkaddr for revoking */ 435 - cur->old_addr = fio.old_blkaddr; 436 - submit_bio = true; 290 + set_new_dnode(&dn, cow_inode, NULL, NULL, 0); 291 + ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); 292 + if (ret && ret != -ENOENT) { 293 + goto out; 294 + } else if (ret == -ENOENT) { 295 + ret = 0; 296 + if (dn.max_level == 0) 297 + goto out; 298 + goto next; 437 299 } 438 - unlock_page(page); 439 - list_move_tail(&cur->list, &revoke_list); 300 + 301 + blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode), 302 + len); 303 + index = off; 304 + for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { 305 + blkaddr = f2fs_data_blkaddr(&dn); 306 + 307 + if (!__is_valid_data_blkaddr(blkaddr)) { 308 + continue; 309 + } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 310 + DATA_GENERIC_ENHANCE)) { 311 + f2fs_put_dnode(&dn); 312 + ret = -EFSCORRUPTED; 313 + goto out; 314 + } 315 + 316 + new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS, 317 + true, NULL); 318 + if (!new) { 319 + f2fs_put_dnode(&dn); 320 + ret = -ENOMEM; 321 + goto out; 322 + } 323 + 324 + ret = __replace_atomic_write_block(inode, index, blkaddr, 325 + &new->old_addr, false); 326 + if (ret) { 327 + f2fs_put_dnode(&dn); 328 + kmem_cache_free(revoke_entry_slab, new); 329 + goto out; 330 + } 331 + 332 + f2fs_update_data_blkaddr(&dn, NULL_ADDR); 333 + new->index = index; 334 + list_add_tail(&new->list, &revoke_list); 335 + } 336 + f2fs_put_dnode(&dn); 337 + next: 338 + off += blen; 339 + len -= blen; 440 340 } 441 341 442 - if (submit_bio) 443 - f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); 342 + out: 343 + __complete_revoke_list(inode, &revoke_list, ret ? true : false); 444 344 445 - if (err) { 446 - /* 447 - * try to revoke all committed pages, but still we could fail 448 - * due to no memory or other reason, if that happened, EAGAIN 449 - * will be returned, which means in such case, transaction is 450 - * already not integrity, caller should use journal to do the 451 - * recovery or rewrite & commit last transaction. For other 452 - * error number, revoking was done by filesystem itself. 453 - */ 454 - err = __revoke_inmem_pages(inode, &revoke_list, 455 - false, true, false); 456 - 457 - /* drop all uncommitted pages */ 458 - __revoke_inmem_pages(inode, &fi->inmem_pages, 459 - true, false, false); 460 - } else { 461 - __revoke_inmem_pages(inode, &revoke_list, 462 - false, false, false); 463 - } 464 - 465 - return err; 345 + return ret; 466 346 } 467 347 468 - int f2fs_commit_inmem_pages(struct inode *inode) 348 + int f2fs_commit_atomic_write(struct inode *inode) 469 349 { 470 350 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 471 351 struct f2fs_inode_info *fi = F2FS_I(inode); 472 352 int err; 473 353 474 - f2fs_balance_fs(sbi, true); 354 + err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 355 + if (err) 356 + return err; 475 357 476 358 f2fs_down_write(&fi->i_gc_rwsem[WRITE]); 477 - 478 359 f2fs_lock_op(sbi); 479 - set_inode_flag(inode, FI_ATOMIC_COMMIT); 480 360 481 - mutex_lock(&fi->inmem_lock); 482 - err = __f2fs_commit_inmem_pages(inode); 483 - mutex_unlock(&fi->inmem_lock); 484 - 485 - clear_inode_flag(inode, FI_ATOMIC_COMMIT); 361 + err = __f2fs_commit_atomic_write(inode); 486 362 487 363 f2fs_unlock_op(sbi); 488 364 f2fs_up_write(&fi->i_gc_rwsem[WRITE]); ··· 5236 5360 if (!sit_entry_set_slab) 5237 5361 goto destroy_discard_cmd; 5238 5362 5239 - inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", 5240 - sizeof(struct inmem_pages)); 5241 - if (!inmem_entry_slab) 5363 + revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry", 5364 + sizeof(struct revoke_entry)); 5365 + if (!revoke_entry_slab) 5242 5366 goto destroy_sit_entry_set; 5243 5367 return 0; 5244 5368 ··· 5257 5381 kmem_cache_destroy(sit_entry_set_slab); 5258 5382 kmem_cache_destroy(discard_cmd_slab); 5259 5383 kmem_cache_destroy(discard_entry_slab); 5260 - kmem_cache_destroy(inmem_entry_slab); 5384 + kmem_cache_destroy(revoke_entry_slab); 5261 5385 }
+2 -2
fs/f2fs/segment.h
··· 225 225 226 226 #define MAX_SKIP_GC_COUNT 16 227 227 228 - struct inmem_pages { 228 + struct revoke_entry { 229 229 struct list_head list; 230 - struct page *page; 231 230 block_t old_addr; /* for revoking when fail to commit */ 231 + pgoff_t index; 232 232 }; 233 233 234 234 struct sit_info {
+1 -5
fs/f2fs/super.c
··· 1339 1339 spin_lock_init(&fi->i_size_lock); 1340 1340 INIT_LIST_HEAD(&fi->dirty_list); 1341 1341 INIT_LIST_HEAD(&fi->gdirty_list); 1342 - INIT_LIST_HEAD(&fi->inmem_ilist); 1343 - INIT_LIST_HEAD(&fi->inmem_pages); 1344 - mutex_init(&fi->inmem_lock); 1345 1342 init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); 1346 1343 init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); 1347 1344 init_f2fs_rwsem(&fi->i_xattr_sem); ··· 1379 1382 atomic_inc(&inode->i_count); 1380 1383 spin_unlock(&inode->i_lock); 1381 1384 1382 - /* some remained atomic pages should discarded */ 1383 1385 if (f2fs_is_atomic_file(inode)) 1384 - f2fs_drop_inmem_pages(inode); 1386 + f2fs_abort_atomic_write(inode, true); 1385 1387 1386 1388 /* should remain fi->extent_tree for writepage */ 1387 1389 f2fs_destroy_extent_node(inode);
-22
include/trace/events/f2fs.h
··· 15 15 TRACE_DEFINE_ENUM(DATA); 16 16 TRACE_DEFINE_ENUM(META); 17 17 TRACE_DEFINE_ENUM(META_FLUSH); 18 - TRACE_DEFINE_ENUM(INMEM); 19 - TRACE_DEFINE_ENUM(INMEM_DROP); 20 - TRACE_DEFINE_ENUM(INMEM_INVALIDATE); 21 - TRACE_DEFINE_ENUM(INMEM_REVOKE); 22 18 TRACE_DEFINE_ENUM(IPU); 23 19 TRACE_DEFINE_ENUM(OPU); 24 20 TRACE_DEFINE_ENUM(HOT); ··· 55 59 { DATA, "DATA" }, \ 56 60 { META, "META" }, \ 57 61 { META_FLUSH, "META_FLUSH" }, \ 58 - { INMEM, "INMEM" }, \ 59 - { INMEM_DROP, "INMEM_DROP" }, \ 60 - { INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \ 61 - { INMEM_REVOKE, "INMEM_REVOKE" }, \ 62 62 { IPU, "IN-PLACE" }, \ 63 63 { OPU, "OUT-OF-PLACE" }) 64 64 ··· 1275 1283 ); 1276 1284 1277 1285 DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, 1278 - 1279 - TP_PROTO(struct page *page, int type), 1280 - 1281 - TP_ARGS(page, type) 1282 - ); 1283 - 1284 - DEFINE_EVENT(f2fs__page, f2fs_register_inmem_page, 1285 - 1286 - TP_PROTO(struct page *page, int type), 1287 - 1288 - TP_ARGS(page, type) 1289 - ); 1290 - 1291 - DEFINE_EVENT(f2fs__page, f2fs_commit_inmem_page, 1292 1286 1293 1287 TP_PROTO(struct page *page, int type), 1294 1288