Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
"This patch-set introduces a couple of new features such as large
sector size, FITRIM, and atomic/volatile writes.

Several patches enhance power-off recovery and checkpoint routines.

The fsck.f2fs starts to support fixing corrupted partitions with
recovery hints provided by this patch-set.

Summary:
- retain some recovery information for fsck.f2fs
- enhance checkpoint speed
- enhance flush command management
- bug fix for lseek
- tune in-place-update policies
- enhance roll-forward speed
- revisit all the roll-forward and fsync rules
- support larget sector size
- support FITRIM
- support atomic and volatile writes

And several clean-ups and bug fixes are included"

* tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (42 commits)
f2fs: support volatile operations for transient data
f2fs: support atomic writes
f2fs: remove unused return value
f2fs: clean up f2fs_ioctl functions
f2fs: potential shift wrapping buf in f2fs_trim_fs()
f2fs: call f2fs_unlock_op after error was handled
f2fs: check the use of macros on block counts and addresses
f2fs: refactor flush_nat_entries to remove costly reorganizing ops
f2fs: introduce FITRIM in f2fs_ioctl
f2fs: introduce cp_control structure
f2fs: use more free segments until SSR is activated
f2fs: change the ipu_policy option to enable combinations
f2fs: fix to search whole dirty segmap when get_victim
f2fs: fix to clean previous mount option when remount_fs
f2fs: skip punching hole in special condition
f2fs: support large sector size
f2fs: fix to truncate blocks past EOF in ->setattr
f2fs: update i_size when __allocate_data_block
f2fs: use MAX_BIO_BLOCKS(sbi)
f2fs: remove redundant operation during roll-forward recovery
...

+1468 -811
+7
Documentation/ABI/testing/sysfs-fs-f2fs
··· 44 44 Controls the FS utilization condition for the in-place-update 45 45 policies. 46 46 47 + What: /sys/fs/f2fs/<disk>/min_fsync_blocks 48 + Date: September 2014 49 + Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> 50 + Description: 51 + Controls the dirty page count condition for the in-place-update 52 + policies. 53 + 47 54 What: /sys/fs/f2fs/<disk>/max_small_discards 48 55 Date: November 2013 49 56 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+10 -3
Documentation/filesystems/f2fs.txt
··· 192 192 193 193 ipu_policy This parameter controls the policy of in-place 194 194 updates in f2fs. There are five policies: 195 - 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, 196 - 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL, 197 - 4: F2FS_IPU_DISABLE. 195 + 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, 196 + 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, 197 + 0x10: F2FS_IPU_FSYNC. 198 198 199 199 min_ipu_util This parameter controls the threshold to trigger 200 200 in-place-updates. The number indicates percentage 201 201 of the filesystem utilization, and used by 202 202 F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. 203 + 204 + min_fsync_blocks This parameter controls the threshold to trigger 205 + in-place-updates when F2FS_IPU_FSYNC mode is set. 206 + The number indicates the number of dirty pages 207 + when fsync needs to flush on its call path. If 208 + the number is less than this value, it triggers 209 + in-place-updates. 203 210 204 211 max_victim_search This parameter controls the number of trials to 205 212 find a victim segment when conducting SSR and
+62 -35
fs/f2fs/checkpoint.c
··· 72 72 return page; 73 73 } 74 74 75 - static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 75 + struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) 76 + { 77 + bool readahead = false; 78 + struct page *page; 79 + 80 + page = find_get_page(META_MAPPING(sbi), index); 81 + if (!page || (page && !PageUptodate(page))) 82 + readahead = true; 83 + f2fs_put_page(page, 0); 84 + 85 + if (readahead) 86 + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); 87 + return get_meta_page(sbi, index); 88 + } 89 + 90 + static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 76 91 { 77 92 switch (type) { 78 93 case META_NAT: ··· 97 82 case META_SSA: 98 83 case META_CP: 99 84 return 0; 85 + case META_POR: 86 + return MAX_BLKADDR(sbi); 100 87 default: 101 88 BUG(); 102 89 } ··· 107 90 /* 108 91 * Readahead CP/NAT/SIT/SSA pages 109 92 */ 110 - int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) 93 + int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) 111 94 { 112 95 block_t prev_blk_addr = 0; 113 96 struct page *page; 114 - int blkno = start; 115 - int max_blks = get_max_meta_blks(sbi, type); 97 + block_t blkno = start; 98 + block_t max_blks = get_max_meta_blks(sbi, type); 116 99 117 100 struct f2fs_io_info fio = { 118 101 .type = META, ··· 142 125 break; 143 126 case META_SSA: 144 127 case META_CP: 145 - /* get ssa/cp block addr */ 128 + case META_POR: 129 + if (unlikely(blkno >= max_blks)) 130 + goto out; 131 + if (unlikely(blkno < SEG0_BLKADDR(sbi))) 132 + goto out; 146 133 blk_addr = blkno; 147 134 break; 148 135 default: ··· 172 151 static int f2fs_write_meta_page(struct page *page, 173 152 struct writeback_control *wbc) 174 153 { 175 - struct inode *inode = page->mapping->host; 176 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 154 + struct f2fs_sb_info *sbi = F2FS_P_SB(page); 177 155 178 156 trace_f2fs_writepage(page, META); 179 157 ··· 197 177 static int f2fs_write_meta_pages(struct address_space *mapping, 198 178 struct writeback_control *wbc) 199 179 { 200 - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 180 + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 201 181 long diff, written; 202 182 203 183 trace_f2fs_writepages(mapping->host, wbc, META); ··· 279 259 280 260 static int f2fs_set_meta_page_dirty(struct page *page) 281 261 { 282 - struct address_space *mapping = page->mapping; 283 - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 284 - 285 262 trace_f2fs_set_page_dirty(page, META); 286 263 287 264 SetPageUptodate(page); 288 265 if (!PageDirty(page)) { 289 266 __set_page_dirty_nobuffers(page); 290 - inc_page_count(sbi, F2FS_DIRTY_META); 267 + inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 291 268 return 1; 292 269 } 293 270 return 0; ··· 395 378 void release_orphan_inode(struct f2fs_sb_info *sbi) 396 379 { 397 380 spin_lock(&sbi->ino_lock[ORPHAN_INO]); 398 - f2fs_bug_on(sbi->n_orphans == 0); 381 + f2fs_bug_on(sbi, sbi->n_orphans == 0); 399 382 sbi->n_orphans--; 400 383 spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 401 384 } ··· 415 398 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 416 399 { 417 400 struct inode *inode = f2fs_iget(sbi->sb, ino); 418 - f2fs_bug_on(IS_ERR(inode)); 401 + f2fs_bug_on(sbi, IS_ERR(inode)); 419 402 clear_nlink(inode); 420 403 421 404 /* truncate all the data during iput */ ··· 476 459 list_for_each_entry(orphan, head, list) { 477 460 if (!page) { 478 461 page = find_get_page(META_MAPPING(sbi), start_blk++); 479 - f2fs_bug_on(!page); 462 + f2fs_bug_on(sbi, !page); 480 463 orphan_blk = 481 464 (struct f2fs_orphan_block *)page_address(page); 482 465 memset(orphan_blk, 0, sizeof(*orphan_blk)); ··· 636 619 637 620 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 638 621 { 639 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 622 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 640 623 641 624 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) 642 625 return -EEXIST; ··· 648 631 return 0; 649 632 } 650 633 651 - void set_dirty_dir_page(struct inode *inode, struct page *page) 634 + void update_dirty_page(struct inode *inode, struct page *page) 652 635 { 653 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 636 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 654 637 struct dir_inode_entry *new; 655 638 int ret = 0; 656 639 657 - if (!S_ISDIR(inode->i_mode)) 640 + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 658 641 return; 642 + 643 + if (!S_ISDIR(inode->i_mode)) { 644 + inode_inc_dirty_pages(inode); 645 + goto out; 646 + } 659 647 660 648 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 661 649 new->inode = inode; ··· 668 646 669 647 spin_lock(&sbi->dir_inode_lock); 670 648 ret = __add_dirty_inode(inode, new); 671 - inode_inc_dirty_dents(inode); 672 - SetPagePrivate(page); 649 + inode_inc_dirty_pages(inode); 673 650 spin_unlock(&sbi->dir_inode_lock); 674 651 675 652 if (ret) 676 653 kmem_cache_free(inode_entry_slab, new); 654 + out: 655 + SetPagePrivate(page); 677 656 } 678 657 679 658 void add_dirty_dir_inode(struct inode *inode) 680 659 { 681 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 660 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 682 661 struct dir_inode_entry *new = 683 662 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 684 663 int ret = 0; ··· 697 674 698 675 void remove_dirty_dir_inode(struct inode *inode) 699 676 { 700 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 677 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 701 678 struct dir_inode_entry *entry; 702 679 703 680 if (!S_ISDIR(inode->i_mode)) 704 681 return; 705 682 706 683 spin_lock(&sbi->dir_inode_lock); 707 - if (get_dirty_dents(inode) || 684 + if (get_dirty_pages(inode) || 708 685 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { 709 686 spin_unlock(&sbi->dir_inode_lock); 710 687 return; ··· 825 802 finish_wait(&sbi->cp_wait, &wait); 826 803 } 827 804 828 - static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 805 + static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 829 806 { 830 807 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 831 808 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 832 - nid_t last_nid = 0; 809 + struct f2fs_nm_info *nm_i = NM_I(sbi); 810 + nid_t last_nid = nm_i->next_scan_nid; 833 811 block_t start_blk; 834 812 struct page *cp_page; 835 813 unsigned int data_sum_blocks, orphan_blocks; ··· 893 869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 894 870 orphan_blocks); 895 871 896 - if (is_umount) { 872 + if (cpc->reason == CP_UMOUNT) { 897 873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 898 874 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 899 875 cp_payload_blks + data_sum_blocks + ··· 909 885 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 910 886 else 911 887 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 888 + 889 + if (sbi->need_fsck) 890 + set_ckpt_flags(ckpt, CP_FSCK_FLAG); 912 891 913 892 /* update SIT/NAT bitmap */ 914 893 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); ··· 947 920 948 921 write_data_summaries(sbi, start_blk); 949 922 start_blk += data_sum_blocks; 950 - if (is_umount) { 923 + if (cpc->reason == CP_UMOUNT) { 951 924 write_node_summaries(sbi, start_blk); 952 925 start_blk += NR_CURSEG_NODE_TYPE; 953 926 } ··· 987 960 /* 988 961 * We guarantee that this checkpoint procedure will not fail. 989 962 */ 990 - void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 963 + void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 991 964 { 992 965 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 993 966 unsigned long long ckpt_ver; 994 967 995 - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 968 + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 996 969 997 970 mutex_lock(&sbi->cp_mutex); 998 971 999 - if (!sbi->s_dirty) 972 + if (!sbi->s_dirty && cpc->reason != CP_DISCARD) 1000 973 goto out; 1001 974 if (unlikely(f2fs_cp_error(sbi))) 1002 975 goto out; 1003 976 if (block_operations(sbi)) 1004 977 goto out; 1005 978 1006 - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 979 + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1007 980 1008 981 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1009 982 f2fs_submit_merged_bio(sbi, NODE, WRITE); ··· 1019 992 1020 993 /* write cached NAT/SIT entries to NAT/SIT area */ 1021 994 flush_nat_entries(sbi); 1022 - flush_sit_entries(sbi); 995 + flush_sit_entries(sbi, cpc); 1023 996 1024 997 /* unlock all the fs_lock[] in do_checkpoint() */ 1025 - do_checkpoint(sbi, is_umount); 998 + do_checkpoint(sbi, cpc); 1026 999 1027 1000 unblock_operations(sbi); 1028 1001 stat_inc_cp_count(sbi->stat_info); 1029 1002 out: 1030 1003 mutex_unlock(&sbi->cp_mutex); 1031 - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 1004 + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1032 1005 } 1033 1006 1034 1007 void init_ino_entry_info(struct f2fs_sb_info *sbi)
+39 -30
fs/f2fs/data.c
··· 85 85 bio = bio_alloc(GFP_NOIO, npages); 86 86 87 87 bio->bi_bdev = sbi->sb->s_bdev; 88 - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); 89 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 90 90 bio->bi_private = sbi; 91 91 ··· 193 193 __submit_merged_bio(io); 194 194 alloc_new: 195 195 if (io->bio == NULL) { 196 - int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 196 + int bio_blocks = MAX_BIO_BLOCKS(sbi); 197 197 198 198 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 199 199 io->fio = *fio; ··· 236 236 237 237 int reserve_new_block(struct dnode_of_data *dn) 238 238 { 239 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 239 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 240 240 241 241 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 242 242 return -EPERM; ··· 258 258 int err; 259 259 260 260 /* if inode_page exists, index should be zero */ 261 - f2fs_bug_on(!need_put && index); 261 + f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); 262 262 263 263 err = get_dnode_of_data(dn, index, ALLOC_NODE); 264 264 if (err) ··· 321 321 block_t start_blkaddr, end_blkaddr; 322 322 int need_update = true; 323 323 324 - f2fs_bug_on(blk_addr == NEW_ADDR); 324 + f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); 325 325 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 326 326 dn->ofs_in_node; 327 327 ··· 396 396 397 397 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 398 398 { 399 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 400 399 struct address_space *mapping = inode->i_mapping; 401 400 struct dnode_of_data dn; 402 401 struct page *page; ··· 428 429 return page; 429 430 } 430 431 431 - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 432 + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, 432 433 sync ? READ_SYNC : READA); 433 434 if (err) 434 435 return ERR_PTR(err); ··· 450 451 */ 451 452 struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 452 453 { 453 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 454 454 struct address_space *mapping = inode->i_mapping; 455 455 struct dnode_of_data dn; 456 456 struct page *page; ··· 488 490 return page; 489 491 } 490 492 491 - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); 493 + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, 494 + dn.data_blkaddr, READ_SYNC); 492 495 if (err) 493 496 return ERR_PTR(err); 494 497 ··· 516 517 struct page *get_new_data_page(struct inode *inode, 517 518 struct page *ipage, pgoff_t index, bool new_i_size) 518 519 { 519 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 520 520 struct address_space *mapping = inode->i_mapping; 521 521 struct page *page; 522 522 struct dnode_of_data dn; ··· 539 541 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 540 542 SetPageUptodate(page); 541 543 } else { 542 - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 543 - READ_SYNC); 544 + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, 545 + dn.data_blkaddr, READ_SYNC); 544 546 if (err) 545 547 goto put_err; 546 548 ··· 571 573 572 574 static int __allocate_data_block(struct dnode_of_data *dn) 573 575 { 574 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 576 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 577 + struct f2fs_inode_info *fi = F2FS_I(dn->inode); 575 578 struct f2fs_summary sum; 576 579 block_t new_blkaddr; 577 580 struct node_info ni; 581 + pgoff_t fofs; 578 582 int type; 579 583 580 584 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) ··· 599 599 update_extent_cache(new_blkaddr, dn); 600 600 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 601 601 602 + /* update i_size */ 603 + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 604 + dn->ofs_in_node; 605 + if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 606 + i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 607 + 602 608 dn->data_blkaddr = new_blkaddr; 603 609 return 0; 604 610 } ··· 620 614 static int __get_data_block(struct inode *inode, sector_t iblock, 621 615 struct buffer_head *bh_result, int create, bool fiemap) 622 616 { 623 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 624 617 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 625 618 unsigned maxblocks = bh_result->b_size >> blkbits; 626 619 struct dnode_of_data dn; ··· 635 630 goto out; 636 631 637 632 if (create) { 638 - f2fs_balance_fs(sbi); 639 - f2fs_lock_op(sbi); 633 + f2fs_balance_fs(F2FS_I_SB(inode)); 634 + f2fs_lock_op(F2FS_I_SB(inode)); 640 635 } 641 636 642 637 /* When reading holes, we need its node page */ ··· 712 707 f2fs_put_dnode(&dn); 713 708 unlock_out: 714 709 if (create) 715 - f2fs_unlock_op(sbi); 710 + f2fs_unlock_op(F2FS_I_SB(inode)); 716 711 out: 717 712 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 718 713 return err; ··· 809 804 struct writeback_control *wbc) 810 805 { 811 806 struct inode *inode = page->mapping->host; 812 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 807 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 813 808 loff_t i_size = i_size_read(inode); 814 809 const pgoff_t end_index = ((unsigned long long) i_size) 815 810 >> PAGE_CACHE_SHIFT; ··· 851 846 if (unlikely(f2fs_cp_error(sbi))) { 852 847 SetPageError(page); 853 848 unlock_page(page); 854 - return 0; 849 + goto out; 855 850 } 856 851 857 852 if (!wbc->for_reclaim) ··· 871 866 872 867 clear_cold_data(page); 873 868 out: 874 - inode_dec_dirty_dents(inode); 869 + inode_dec_dirty_pages(inode); 875 870 unlock_page(page); 876 871 if (need_balance_fs) 877 872 f2fs_balance_fs(sbi); ··· 897 892 struct writeback_control *wbc) 898 893 { 899 894 struct inode *inode = mapping->host; 900 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 895 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 901 896 bool locked = false; 902 897 int ret; 903 898 long diff; ··· 909 904 return 0; 910 905 911 906 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 912 - get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && 907 + get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 913 908 available_free_memory(sbi, DIRTY_DENTS)) 914 909 goto skip_write; 915 910 ··· 931 926 return ret; 932 927 933 928 skip_write: 934 - wbc->pages_skipped += get_dirty_dents(inode); 929 + wbc->pages_skipped += get_dirty_pages(inode); 935 930 return 0; 936 931 } 937 932 ··· 950 945 struct page **pagep, void **fsdata) 951 946 { 952 947 struct inode *inode = mapping->host; 953 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 948 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 954 949 struct page *page; 955 950 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 956 951 struct dnode_of_data dn; ··· 1052 1047 1053 1048 trace_f2fs_write_end(inode, pos, len, copied); 1054 1049 1055 - set_page_dirty(page); 1050 + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) 1051 + register_inmem_page(inode, page); 1052 + else 1053 + set_page_dirty(page); 1056 1054 1057 1055 if (pos + copied > i_size_read(inode)) { 1058 1056 i_size_write(inode, pos + copied); ··· 1100 1092 if (check_direct_IO(inode, rw, iter, offset)) 1101 1093 return 0; 1102 1094 1103 - /* clear fsync mark to recover these blocks */ 1104 - fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1105 - 1106 1095 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1107 1096 1108 1097 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); ··· 1115 1110 unsigned int length) 1116 1111 { 1117 1112 struct inode *inode = page->mapping->host; 1113 + 1114 + if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) 1115 + return; 1116 + 1118 1117 if (PageDirty(page)) 1119 - inode_dec_dirty_dents(inode); 1118 + inode_dec_dirty_pages(inode); 1120 1119 ClearPagePrivate(page); 1121 1120 } 1122 1121 ··· 1142 1133 1143 1134 if (!PageDirty(page)) { 1144 1135 __set_page_dirty_nobuffers(page); 1145 - set_dirty_dir_page(inode, page); 1136 + update_dirty_page(inode, page); 1146 1137 return 1; 1147 1138 } 1148 1139 return 0;
+10 -10
fs/f2fs/debug.c
··· 93 93 total_vblocks = 0; 94 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 95 95 hblks_per_sec = blks_per_sec / 2; 96 - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 97 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 98 98 dist = abs(vblocks - hblks_per_sec); 99 99 bimodal += dist * dist; ··· 103 103 ndirty++; 104 104 } 105 105 } 106 - dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 + dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 107 107 si->bimodal = bimodal / dist; 108 108 if (si->dirty_count) 109 109 si->avg_vblocks = total_vblocks / ndirty; ··· 131 131 132 132 /* build sit */ 133 133 si->base_mem += sizeof(struct sit_info); 134 - si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); 135 - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 136 - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); 134 + si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); 135 + si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); 136 + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); 137 137 if (sbi->segs_per_sec > 1) 138 - si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); 138 + si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); 139 139 si->base_mem += __bitmap_size(sbi, SIT_BITMAP); 140 140 141 141 /* build free segmap */ 142 142 si->base_mem += sizeof(struct free_segmap_info); 143 - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 144 - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 143 + si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); 144 + si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); 145 145 146 146 /* build curseg */ 147 147 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; ··· 149 149 150 150 /* build dirty segmap */ 151 151 si->base_mem += sizeof(struct dirty_seglist_info); 152 - si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 153 - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 152 + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi)); 153 + si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); 154 154 155 155 /* build nm */ 156 156 si->base_mem += sizeof(struct f2fs_nm_info);
+8 -11
fs/f2fs/dir.c
··· 126 126 * For the most part, it should be a bug when name_len is zero. 127 127 * We stop here for figuring out where the bugs has occurred. 128 128 */ 129 - f2fs_bug_on(!de->name_len); 129 + f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); 130 130 131 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 132 132 } ··· 151 151 bool room = false; 152 152 int max_slots = 0; 153 153 154 - f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 154 + f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); 155 155 156 156 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); 157 157 nblock = bucket_blocks(level); ··· 284 284 285 285 int update_dent_inode(struct inode *inode, const struct qstr *name) 286 286 { 287 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 288 287 struct page *page; 289 288 290 - page = get_node_page(sbi, inode->i_ino); 289 + page = get_node_page(F2FS_I_SB(inode), inode->i_ino); 291 290 if (IS_ERR(page)) 292 291 return PTR_ERR(page); 293 292 ··· 336 337 static struct page *init_inode_metadata(struct inode *inode, 337 338 struct inode *dir, const struct qstr *name) 338 339 { 339 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 340 340 struct page *page; 341 341 int err; 342 342 ··· 358 360 if (err) 359 361 goto put_error; 360 362 } else { 361 - page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 363 + page = get_node_page(F2FS_I_SB(dir), inode->i_ino); 362 364 if (IS_ERR(page)) 363 365 return page; 364 366 ··· 379 381 * we should remove this inode from orphan list. 380 382 */ 381 383 if (inode->i_nlink == 0) 382 - remove_orphan_inode(sbi, inode->i_ino); 384 + remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); 383 385 inc_nlink(inode); 384 386 } 385 387 return page; ··· 569 571 { 570 572 struct f2fs_dentry_block *dentry_blk; 571 573 unsigned int bit_pos; 572 - struct address_space *mapping = page->mapping; 573 - struct inode *dir = mapping->host; 574 + struct inode *dir = page->mapping->host; 574 575 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 575 576 int i; 576 577 ··· 591 594 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 592 595 593 596 if (inode) { 594 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 597 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 595 598 596 599 down_write(&F2FS_I(inode)->i_sem); 597 600 ··· 618 621 truncate_hole(dir, page->index, page->index + 1); 619 622 clear_page_dirty_for_io(page); 620 623 ClearPageUptodate(page); 621 - inode_dec_dirty_dents(dir); 624 + inode_dec_dirty_pages(dir); 622 625 } 623 626 f2fs_put_page(page, 1); 624 627 }
+121 -42
fs/f2fs/f2fs.h
··· 21 21 #include <linux/sched.h> 22 22 23 23 #ifdef CONFIG_F2FS_CHECK_FS 24 - #define f2fs_bug_on(condition) BUG_ON(condition) 24 + #define f2fs_bug_on(sbi, condition) BUG_ON(condition) 25 25 #define f2fs_down_write(x, y) down_write_nest_lock(x, y) 26 26 #else 27 - #define f2fs_bug_on(condition) WARN_ON(condition) 27 + #define f2fs_bug_on(sbi, condition) \ 28 + do { \ 29 + if (unlikely(condition)) { \ 30 + WARN_ON(1); \ 31 + sbi->need_fsck = true; \ 32 + } \ 33 + } while (0) 28 34 #define f2fs_down_write(x, y) down_write(x) 29 35 #endif 30 36 ··· 96 90 SIT_BITMAP 97 91 }; 98 92 93 + enum { 94 + CP_UMOUNT, 95 + CP_SYNC, 96 + CP_DISCARD, 97 + }; 98 + 99 + struct cp_control { 100 + int reason; 101 + __u64 trim_start; 102 + __u64 trim_end; 103 + __u64 trim_minlen; 104 + __u64 trimmed; 105 + }; 106 + 99 107 /* 100 108 * For CP/NAT/SIT/SSA readahead 101 109 */ ··· 117 97 META_CP, 118 98 META_NAT, 119 99 META_SIT, 120 - META_SSA 100 + META_SSA, 101 + META_POR, 121 102 }; 122 103 123 104 /* for the list of ino */ ··· 151 130 struct fsync_inode_entry { 152 131 struct list_head list; /* list head */ 153 132 struct inode *inode; /* vfs inode pointer */ 154 - block_t blkaddr; /* block address locating the last inode */ 133 + block_t blkaddr; /* block address locating the last fsync */ 134 + block_t last_dentry; /* block address locating the last dentry */ 135 + block_t last_inode; /* block address locating the last inode */ 155 136 }; 156 137 157 138 #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) ··· 163 140 #define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) 164 141 #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) 165 142 #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) 143 + 144 + #define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum)) 145 + #define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum)) 166 146 167 147 static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) 168 148 { ··· 181 155 return before; 182 156 } 183 157 158 + static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, 159 + int type) 160 + { 161 + if (type == NAT_JOURNAL) 162 + return size <= MAX_NAT_JENTRIES(sum); 163 + return size <= MAX_SIT_JENTRIES(sum); 164 + } 165 + 184 166 /* 185 167 * ioctl commands 186 168 */ 187 - #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 188 - #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 169 + #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 170 + #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 171 + 172 + #define F2FS_IOCTL_MAGIC 0xf5 173 + #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) 174 + #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) 175 + #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 189 176 190 177 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 191 178 /* ··· 261 222 /* Use below internally in f2fs*/ 262 223 unsigned long flags; /* use to pass per-file flags */ 263 224 struct rw_semaphore i_sem; /* protect fi info */ 264 - atomic_t dirty_dents; /* # of dirty dentry pages */ 225 + atomic_t dirty_pages; /* # of dirty pages */ 265 226 f2fs_hash_t chash; /* hash value of given file name */ 266 227 unsigned int clevel; /* maximum level of given file name */ 267 228 nid_t i_xattr_nid; /* node id that contains xattrs */ 268 229 unsigned long long xattr_ver; /* cp version of xattr modification */ 269 230 struct extent_info ext; /* in-memory extent cache entry */ 270 231 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ 232 + 233 + struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 234 + struct mutex inmem_lock; /* lock for inmemory pages */ 271 235 }; 272 236 273 237 static inline void get_extent_info(struct extent_info *ext, ··· 302 260 303 261 /* NAT cache management */ 304 262 struct radix_tree_root nat_root;/* root of the nat entry cache */ 263 + struct radix_tree_root nat_set_root;/* root of the nat set cache */ 305 264 rwlock_t nat_tree_lock; /* protect nat_tree_lock */ 306 - unsigned int nat_cnt; /* the # of cached nat entries */ 307 265 struct list_head nat_entries; /* cached nat entry list (clean) */ 308 - struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 309 - struct list_head nat_entry_set; /* nat entry set list */ 266 + unsigned int nat_cnt; /* the # of cached nat entries */ 310 267 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 311 268 312 269 /* free node ids management */ ··· 373 332 }; 374 333 375 334 struct flush_cmd { 376 - struct flush_cmd *next; 377 335 struct completion wait; 336 + struct llist_node llnode; 378 337 int ret; 379 338 }; 380 339 381 340 struct flush_cmd_control { 382 341 struct task_struct *f2fs_issue_flush; /* flush thread */ 383 342 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ 384 - struct flush_cmd *issue_list; /* list for command issue */ 385 - struct flush_cmd *dispatch_list; /* list for command dispatch */ 386 - spinlock_t issue_lock; /* for issue list lock */ 387 - struct flush_cmd *issue_tail; /* list tail of issue list */ 343 + struct llist_head issue_list; /* list for command issue */ 344 + struct llist_node *dispatch_list; /* list for command dispatch */ 388 345 }; 389 346 390 347 struct f2fs_sm_info { ··· 408 369 int nr_discards; /* # of discards in the list */ 409 370 int max_discards; /* max. discards to be issued */ 410 371 372 + struct list_head sit_entry_set; /* sit entry set list */ 373 + 411 374 unsigned int ipu_policy; /* in-place-update policy */ 412 375 unsigned int min_ipu_util; /* in-place-update threshold */ 376 + unsigned int min_fsync_blocks; /* threshold for fsync */ 413 377 414 378 /* for flush command control */ 415 379 struct flush_cmd_control *cmd_control_info; ··· 476 434 struct buffer_head *raw_super_buf; /* buffer head of raw sb */ 477 435 struct f2fs_super_block *raw_super; /* raw super block pointer */ 478 436 int s_dirty; /* dirty flag for checkpoint */ 437 + bool need_fsck; /* need fsck.f2fs to fix */ 479 438 480 439 /* for node-related operations */ 481 440 struct f2fs_nm_info *nm_info; /* node manager */ ··· 580 537 static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) 581 538 { 582 539 return sb->s_fs_info; 540 + } 541 + 542 + static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode) 543 + { 544 + return F2FS_SB(inode->i_sb); 545 + } 546 + 547 + static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) 548 + { 549 + return F2FS_I_SB(mapping->host); 550 + } 551 + 552 + static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) 553 + { 554 + return F2FS_M_SB(page->mapping); 583 555 } 584 556 585 557 static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) ··· 761 703 blkcnt_t count) 762 704 { 763 705 spin_lock(&sbi->stat_lock); 764 - f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); 765 - f2fs_bug_on(inode->i_blocks < count); 706 + f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); 707 + f2fs_bug_on(sbi, inode->i_blocks < count); 766 708 inode->i_blocks -= count; 767 709 sbi->total_valid_block_count -= (block_t)count; 768 710 spin_unlock(&sbi->stat_lock); ··· 774 716 F2FS_SET_SB_DIRT(sbi); 775 717 } 776 718 777 - static inline void inode_inc_dirty_dents(struct inode *inode) 719 + static inline void inode_inc_dirty_pages(struct inode *inode) 778 720 { 779 - inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 780 - atomic_inc(&F2FS_I(inode)->dirty_dents); 721 + atomic_inc(&F2FS_I(inode)->dirty_pages); 722 + if (S_ISDIR(inode->i_mode)) 723 + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); 781 724 } 782 725 783 726 static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) ··· 786 727 atomic_dec(&sbi->nr_pages[count_type]); 787 728 } 788 729 789 - static inline void inode_dec_dirty_dents(struct inode *inode) 730 + static inline void inode_dec_dirty_pages(struct inode *inode) 790 731 { 791 - if (!S_ISDIR(inode->i_mode)) 732 + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 792 733 return; 793 734 794 - dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 795 - atomic_dec(&F2FS_I(inode)->dirty_dents); 735 + atomic_dec(&F2FS_I(inode)->dirty_pages); 736 + 737 + if (S_ISDIR(inode->i_mode)) 738 + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); 796 739 } 797 740 798 741 static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) ··· 802 741 return atomic_read(&sbi->nr_pages[count_type]); 803 742 } 804 743 805 - static inline int get_dirty_dents(struct inode *inode) 744 + static inline int get_dirty_pages(struct inode *inode) 806 745 { 807 - return atomic_read(&F2FS_I(inode)->dirty_dents); 746 + return atomic_read(&F2FS_I(inode)->dirty_pages); 808 747 } 809 748 810 749 static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) ··· 909 848 { 910 849 spin_lock(&sbi->stat_lock); 911 850 912 - f2fs_bug_on(!sbi->total_valid_block_count); 913 - f2fs_bug_on(!sbi->total_valid_node_count); 914 - f2fs_bug_on(!inode->i_blocks); 851 + f2fs_bug_on(sbi, !sbi->total_valid_block_count); 852 + f2fs_bug_on(sbi, !sbi->total_valid_node_count); 853 + f2fs_bug_on(sbi, !inode->i_blocks); 915 854 916 855 inode->i_blocks--; 917 856 sbi->total_valid_node_count--; ··· 928 867 static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 929 868 { 930 869 spin_lock(&sbi->stat_lock); 931 - f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); 870 + f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); 932 871 sbi->total_valid_inode_count++; 933 872 spin_unlock(&sbi->stat_lock); 934 873 } ··· 936 875 static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 937 876 { 938 877 spin_lock(&sbi->stat_lock); 939 - f2fs_bug_on(!sbi->total_valid_inode_count); 878 + f2fs_bug_on(sbi, !sbi->total_valid_inode_count); 940 879 sbi->total_valid_inode_count--; 941 880 spin_unlock(&sbi->stat_lock); 942 881 } ··· 952 891 return; 953 892 954 893 if (unlock) { 955 - f2fs_bug_on(!PageLocked(page)); 894 + f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); 956 895 unlock_page(page); 957 896 } 958 897 page_cache_release(page); ··· 1059 998 FI_INLINE_DATA, /* used for inline data*/ 1060 999 FI_APPEND_WRITE, /* inode has appended data */ 1061 1000 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1062 - FI_NEED_IPU, /* used fo ipu for fdatasync */ 1001 + FI_NEED_IPU, /* used for ipu per file */ 1002 + FI_ATOMIC_FILE, /* indicate atomic file */ 1003 + FI_VOLATILE_FILE, /* indicate volatile file */ 1063 1004 }; 1064 1005 1065 1006 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) ··· 1148 1085 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); 1149 1086 } 1150 1087 1088 + static inline bool f2fs_is_atomic_file(struct inode *inode) 1089 + { 1090 + return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); 1091 + } 1092 + 1093 + static inline bool f2fs_is_volatile_file(struct inode *inode) 1094 + { 1095 + return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); 1096 + } 1097 + 1151 1098 static inline void *inline_data_addr(struct page *page) 1152 1099 { 1153 1100 struct f2fs_inode *ri = F2FS_INODE(page); ··· 1214 1141 void update_inode_page(struct inode *); 1215 1142 int f2fs_write_inode(struct inode *, struct writeback_control *); 1216 1143 void f2fs_evict_inode(struct inode *); 1144 + void handle_failed_inode(struct inode *); 1217 1145 1218 1146 /* 1219 1147 * namei.c ··· 1262 1188 struct node_info; 1263 1189 1264 1190 bool available_free_memory(struct f2fs_sb_info *, int); 1265 - int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1266 - bool fsync_mark_done(struct f2fs_sb_info *, nid_t); 1267 - void fsync_mark_clear(struct f2fs_sb_info *, nid_t); 1191 + bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1192 + bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); 1193 + bool need_inode_block_update(struct f2fs_sb_info *, nid_t); 1268 1194 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1269 1195 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1270 1196 int truncate_inode_blocks(struct inode *, pgoff_t); ··· 1295 1221 /* 1296 1222 * segment.c 1297 1223 */ 1224 + void register_inmem_page(struct inode *, struct page *); 1225 + void commit_inmem_pages(struct inode *, bool); 1298 1226 void f2fs_balance_fs(struct f2fs_sb_info *); 1299 1227 void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1300 1228 int f2fs_issue_flush(struct f2fs_sb_info *); ··· 1305 1229 void invalidate_blocks(struct f2fs_sb_info *, block_t); 1306 1230 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1307 1231 void clear_prefree_segments(struct f2fs_sb_info *); 1232 + void release_discard_addrs(struct f2fs_sb_info *); 1308 1233 void discard_next_dnode(struct f2fs_sb_info *, block_t); 1309 1234 int npages_for_summary_flush(struct f2fs_sb_info *); 1310 1235 void allocate_new_segments(struct f2fs_sb_info *); 1236 + int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 1311 1237 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1312 1238 void write_meta_page(struct f2fs_sb_info *, struct page *); 1313 1239 void write_node_page(struct f2fs_sb_info *, struct page *, ··· 1326 1248 void write_node_summaries(struct f2fs_sb_info *, block_t); 1327 1249 int lookup_journal_in_cursum(struct f2fs_summary_block *, 1328 1250 int, unsigned int, int); 1329 - void flush_sit_entries(struct f2fs_sb_info *); 1251 + void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); 1330 1252 int build_segment_manager(struct f2fs_sb_info *); 1331 1253 void destroy_segment_manager(struct f2fs_sb_info *); 1332 1254 int __init create_segment_manager_caches(void); ··· 1337 1259 */ 1338 1260 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1339 1261 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1340 - int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1262 + struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); 1263 + int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); 1341 1264 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1342 1265 void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1343 1266 void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); ··· 1350 1271 void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1351 1272 void recover_orphan_inodes(struct f2fs_sb_info *); 1352 1273 int get_valid_checkpoint(struct f2fs_sb_info *); 1353 - void set_dirty_dir_page(struct inode *, struct page *); 1274 + void update_dirty_page(struct inode *, struct page *); 1354 1275 void add_dirty_dir_inode(struct inode *); 1355 1276 void remove_dirty_dir_inode(struct inode *); 1356 1277 void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1357 - void write_checkpoint(struct f2fs_sb_info *, bool); 1278 + void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); 1358 1279 void init_ino_entry_info(struct f2fs_sb_info *); 1359 1280 int __init create_checkpoint_caches(void); 1360 1281 void destroy_checkpoint_caches(void); ··· 1438 1359 #define stat_inc_inline_inode(inode) \ 1439 1360 do { \ 1440 1361 if (f2fs_has_inline_data(inode)) \ 1441 - ((F2FS_SB(inode->i_sb))->inline_inode++); \ 1362 + ((F2FS_I_SB(inode))->inline_inode++); \ 1442 1363 } while (0) 1443 1364 #define stat_dec_inline_inode(inode) \ 1444 1365 do { \ 1445 1366 if (f2fs_has_inline_data(inode)) \ 1446 - ((F2FS_SB(inode->i_sb))->inline_inode--); \ 1367 + ((F2FS_I_SB(inode))->inline_inode--); \ 1447 1368 } while (0) 1448 1369 1449 1370 #define stat_inc_seg_type(sbi, curseg) \
+199 -76
fs/f2fs/file.c
··· 33 33 { 34 34 struct page *page = vmf->page; 35 35 struct inode *inode = file_inode(vma->vm_file); 36 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 36 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 37 37 struct dnode_of_data dn; 38 38 int err; 39 39 ··· 117 117 118 118 static inline bool need_do_checkpoint(struct inode *inode) 119 119 { 120 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 120 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 121 121 bool need_cp = false; 122 122 123 123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) ··· 138 138 { 139 139 struct inode *inode = file->f_mapping->host; 140 140 struct f2fs_inode_info *fi = F2FS_I(inode); 141 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 141 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 142 + nid_t ino = inode->i_ino; 142 143 int ret = 0; 143 144 bool need_cp = false; 144 145 struct writeback_control wbc = { ··· 154 153 trace_f2fs_sync_file_enter(inode); 155 154 156 155 /* if fdatasync is triggered, let's do in-place-update */ 157 - if (datasync) 156 + if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 158 157 set_inode_flag(fi, FI_NEED_IPU); 159 - 160 158 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 161 - if (datasync) 162 - clear_inode_flag(fi, FI_NEED_IPU); 159 + clear_inode_flag(fi, FI_NEED_IPU); 160 + 163 161 if (ret) { 164 162 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 165 163 return ret; ··· 168 168 * if there is no written data, don't waste time to write recovery info. 169 169 */ 170 170 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && 171 - !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { 171 + !exist_written_data(sbi, ino, APPEND_INO)) { 172 + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); 173 + 174 + /* But we need to avoid that there are some inode updates */ 175 + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { 176 + f2fs_put_page(i, 0); 177 + goto go_write; 178 + } 179 + f2fs_put_page(i, 0); 180 + 172 181 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || 173 - exist_written_data(sbi, inode->i_ino, UPDATE_INO)) 182 + exist_written_data(sbi, ino, UPDATE_INO)) 174 183 goto flush_out; 175 184 goto out; 176 185 } 177 - 186 + go_write: 178 187 /* guarantee free sections for fsync */ 179 188 f2fs_balance_fs(sbi); 180 189 ··· 216 207 up_write(&fi->i_sem); 217 208 } 218 209 } else { 219 - /* if there is no written node page, write its inode page */ 220 - while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 221 - if (fsync_mark_done(sbi, inode->i_ino)) 222 - goto out; 210 + sync_nodes: 211 + sync_node_pages(sbi, ino, &wbc); 212 + 213 + if (need_inode_block_update(sbi, ino)) { 223 214 mark_inode_dirty_sync(inode); 224 215 ret = f2fs_write_inode(inode, NULL); 225 216 if (ret) 226 217 goto out; 218 + goto sync_nodes; 227 219 } 228 - ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 220 + 221 + ret = wait_on_node_pages_writeback(sbi, ino); 229 222 if (ret) 230 223 goto out; 231 224 232 225 /* once recovery info is written, don't need to tack this */ 233 - remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); 226 + remove_dirty_inode(sbi, ino, APPEND_INO); 234 227 clear_inode_flag(fi, FI_APPEND_WRITE); 235 228 flush_out: 236 - remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 229 + remove_dirty_inode(sbi, ino, UPDATE_INO); 237 230 clear_inode_flag(fi, FI_UPDATE_WRITE); 238 - ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 231 + ret = f2fs_issue_flush(F2FS_I_SB(inode)); 239 232 } 240 233 out: 241 234 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); ··· 364 353 maxbytes, i_size_read(inode)); 365 354 case SEEK_DATA: 366 355 case SEEK_HOLE: 356 + if (offset < 0) 357 + return -ENXIO; 367 358 return f2fs_seek_block(file, offset, whence); 368 359 } 369 360 ··· 382 369 int truncate_data_blocks_range(struct dnode_of_data *dn, int count) 383 370 { 384 371 int nr_free = 0, ofs = dn->ofs_in_node; 385 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 372 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 386 373 struct f2fs_node *raw_node; 387 374 __le32 *addr; 388 375 ··· 445 432 446 433 int truncate_blocks(struct inode *inode, u64 from, bool lock) 447 434 { 448 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 435 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 449 436 unsigned int blocksize = inode->i_sb->s_blocksize; 450 437 struct dnode_of_data dn; 451 438 pgoff_t free_from; ··· 476 463 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 477 464 478 465 count -= dn.ofs_in_node; 479 - f2fs_bug_on(count < 0); 466 + f2fs_bug_on(sbi, count < 0); 480 467 481 468 if (dn.ofs_in_node || IS_INODE(dn.node_page)) { 482 469 truncate_data_blocks_range(&dn, count); ··· 560 547 if (err) 561 548 return err; 562 549 563 - if ((attr->ia_valid & ATTR_SIZE) && 564 - attr->ia_size != i_size_read(inode)) { 550 + if (attr->ia_valid & ATTR_SIZE) { 565 551 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); 566 552 if (err) 567 553 return err; 568 554 569 - truncate_setsize(inode, attr->ia_size); 570 - f2fs_truncate(inode); 571 - f2fs_balance_fs(F2FS_SB(inode->i_sb)); 555 + if (attr->ia_size != i_size_read(inode)) { 556 + truncate_setsize(inode, attr->ia_size); 557 + f2fs_truncate(inode); 558 + f2fs_balance_fs(F2FS_I_SB(inode)); 559 + } else { 560 + /* 561 + * giving a chance to truncate blocks past EOF which 562 + * are fallocated with FALLOC_FL_KEEP_SIZE. 563 + */ 564 + f2fs_truncate(inode); 565 + } 572 566 } 573 567 574 568 __setattr_copy(inode, attr); ··· 609 589 static void fill_zero(struct inode *inode, pgoff_t index, 610 590 loff_t start, loff_t len) 611 591 { 612 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 592 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 613 593 struct page *page; 614 594 615 595 if (!len) ··· 658 638 loff_t off_start, off_end; 659 639 int ret = 0; 660 640 641 + if (!S_ISREG(inode->i_mode)) 642 + return -EOPNOTSUPP; 643 + 644 + /* skip punching hole beyond i_size */ 645 + if (offset >= inode->i_size) 646 + return ret; 647 + 661 648 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); 662 649 if (ret) 663 650 return ret; ··· 688 661 if (pg_start < pg_end) { 689 662 struct address_space *mapping = inode->i_mapping; 690 663 loff_t blk_start, blk_end; 691 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 664 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 692 665 693 666 f2fs_balance_fs(sbi); 694 667 ··· 709 682 static int expand_inode_data(struct inode *inode, loff_t offset, 710 683 loff_t len, int mode) 711 684 { 712 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 685 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 713 686 pgoff_t index, pg_start, pg_end; 714 687 loff_t new_size = i_size_read(inode); 715 688 loff_t off_start, off_end; ··· 805 778 return flags & F2FS_OTHER_FLMASK; 806 779 } 807 780 808 - long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 781 + static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) 809 782 { 810 783 struct inode *inode = file_inode(filp); 811 784 struct f2fs_inode_info *fi = F2FS_I(inode); 812 - unsigned int flags; 785 + unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; 786 + return put_user(flags, (int __user *)arg); 787 + } 788 + 789 + static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) 790 + { 791 + struct inode *inode = file_inode(filp); 792 + struct f2fs_inode_info *fi = F2FS_I(inode); 793 + unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; 794 + unsigned int oldflags; 813 795 int ret; 814 796 797 + ret = mnt_want_write_file(filp); 798 + if (ret) 799 + return ret; 800 + 801 + if (!inode_owner_or_capable(inode)) { 802 + ret = -EACCES; 803 + goto out; 804 + } 805 + 806 + if (get_user(flags, (int __user *)arg)) { 807 + ret = -EFAULT; 808 + goto out; 809 + } 810 + 811 + flags = f2fs_mask_flags(inode->i_mode, flags); 812 + 813 + mutex_lock(&inode->i_mutex); 814 + 815 + oldflags = fi->i_flags; 816 + 817 + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 818 + if (!capable(CAP_LINUX_IMMUTABLE)) { 819 + mutex_unlock(&inode->i_mutex); 820 + ret = -EPERM; 821 + goto out; 822 + } 823 + } 824 + 825 + flags = flags & FS_FL_USER_MODIFIABLE; 826 + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; 827 + fi->i_flags = flags; 828 + mutex_unlock(&inode->i_mutex); 829 + 830 + f2fs_set_inode_flags(inode); 831 + inode->i_ctime = CURRENT_TIME; 832 + mark_inode_dirty(inode); 833 + out: 834 + mnt_drop_write_file(filp); 835 + return ret; 836 + } 837 + 838 + static int f2fs_ioc_start_atomic_write(struct file *filp) 839 + { 840 + struct inode *inode = file_inode(filp); 841 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 842 + 843 + if (!inode_owner_or_capable(inode)) 844 + return -EACCES; 845 + 846 + f2fs_balance_fs(sbi); 847 + 848 + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 849 + 850 + return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); 851 + } 852 + 853 + static int f2fs_ioc_commit_atomic_write(struct file *filp) 854 + { 855 + struct inode *inode = file_inode(filp); 856 + int ret; 857 + 858 + if (!inode_owner_or_capable(inode)) 859 + return -EACCES; 860 + 861 + if (f2fs_is_volatile_file(inode)) 862 + return 0; 863 + 864 + ret = mnt_want_write_file(filp); 865 + if (ret) 866 + return ret; 867 + 868 + if (f2fs_is_atomic_file(inode)) 869 + commit_inmem_pages(inode, false); 870 + 871 + ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); 872 + mnt_drop_write_file(filp); 873 + return ret; 874 + } 875 + 876 + static int f2fs_ioc_start_volatile_write(struct file *filp) 877 + { 878 + struct inode *inode = file_inode(filp); 879 + 880 + if (!inode_owner_or_capable(inode)) 881 + return -EACCES; 882 + 883 + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 884 + return 0; 885 + } 886 + 887 + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 888 + { 889 + struct inode *inode = file_inode(filp); 890 + struct super_block *sb = inode->i_sb; 891 + struct request_queue *q = bdev_get_queue(sb->s_bdev); 892 + struct fstrim_range range; 893 + int ret; 894 + 895 + if (!capable(CAP_SYS_ADMIN)) 896 + return -EPERM; 897 + 898 + if (!blk_queue_discard(q)) 899 + return -EOPNOTSUPP; 900 + 901 + if (copy_from_user(&range, (struct fstrim_range __user *)arg, 902 + sizeof(range))) 903 + return -EFAULT; 904 + 905 + range.minlen = max((unsigned int)range.minlen, 906 + q->limits.discard_granularity); 907 + ret = f2fs_trim_fs(F2FS_SB(sb), &range); 908 + if (ret < 0) 909 + return ret; 910 + 911 + if (copy_to_user((struct fstrim_range __user *)arg, &range, 912 + sizeof(range))) 913 + return -EFAULT; 914 + return 0; 915 + } 916 + 917 + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 918 + { 815 919 switch (cmd) { 816 920 case F2FS_IOC_GETFLAGS: 817 - flags = fi->i_flags & FS_FL_USER_VISIBLE; 818 - return put_user(flags, (int __user *) arg); 921 + return f2fs_ioc_getflags(filp, arg); 819 922 case F2FS_IOC_SETFLAGS: 820 - { 821 - unsigned int oldflags; 822 - 823 - ret = mnt_want_write_file(filp); 824 - if (ret) 825 - return ret; 826 - 827 - if (!inode_owner_or_capable(inode)) { 828 - ret = -EACCES; 829 - goto out; 830 - } 831 - 832 - if (get_user(flags, (int __user *) arg)) { 833 - ret = -EFAULT; 834 - goto out; 835 - } 836 - 837 - flags = f2fs_mask_flags(inode->i_mode, flags); 838 - 839 - mutex_lock(&inode->i_mutex); 840 - 841 - oldflags = fi->i_flags; 842 - 843 - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 844 - if (!capable(CAP_LINUX_IMMUTABLE)) { 845 - mutex_unlock(&inode->i_mutex); 846 - ret = -EPERM; 847 - goto out; 848 - } 849 - } 850 - 851 - flags = flags & FS_FL_USER_MODIFIABLE; 852 - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; 853 - fi->i_flags = flags; 854 - mutex_unlock(&inode->i_mutex); 855 - 856 - f2fs_set_inode_flags(inode); 857 - inode->i_ctime = CURRENT_TIME; 858 - mark_inode_dirty(inode); 859 - out: 860 - mnt_drop_write_file(filp); 861 - return ret; 862 - } 923 + return f2fs_ioc_setflags(filp, arg); 924 + case F2FS_IOC_START_ATOMIC_WRITE: 925 + return f2fs_ioc_start_atomic_write(filp); 926 + case F2FS_IOC_COMMIT_ATOMIC_WRITE: 927 + return f2fs_ioc_commit_atomic_write(filp); 928 + case F2FS_IOC_START_VOLATILE_WRITE: 929 + return f2fs_ioc_start_volatile_write(filp); 930 + case FITRIM: 931 + return f2fs_ioc_fitrim(filp, arg); 863 932 default: 864 933 return -ENOTTY; 865 934 }
+17 -9
fs/f2fs/gc.c
··· 193 193 * selected by background GC before. 194 194 * Those segments guarantee they have small valid blocks. 195 195 */ 196 - for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { 196 + for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 197 197 if (sec_usage_check(sbi, secno)) 198 198 continue; 199 199 clear_bit(secno, dirty_i->victim_secmap); ··· 263 263 unsigned int secno, max_cost; 264 264 int nsearched = 0; 265 265 266 + mutex_lock(&dirty_i->seglist_lock); 267 + 266 268 p.alloc_mode = alloc_mode; 267 269 select_policy(sbi, gc_type, type, &p); 268 270 269 271 p.min_segno = NULL_SEGNO; 270 272 p.min_cost = max_cost = get_max_cost(sbi, &p); 271 - 272 - mutex_lock(&dirty_i->seglist_lock); 273 273 274 274 if (p.alloc_mode == LFS && gc_type == FG_GC) { 275 275 p.min_segno = check_bg_victims(sbi); ··· 281 281 unsigned long cost; 282 282 unsigned int segno; 283 283 284 - segno = find_next_bit(p.dirty_segmap, 285 - TOTAL_SEGS(sbi), p.offset); 286 - if (segno >= TOTAL_SEGS(sbi)) { 284 + segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); 285 + if (segno >= MAIN_SEGS(sbi)) { 287 286 if (sbi->last_victim[p.gc_mode]) { 288 287 sbi->last_victim[p.gc_mode] = 0; 289 288 p.offset = 0; ··· 422 423 if (IS_ERR(node_page)) 423 424 continue; 424 425 426 + /* block may become invalid during get_node_page */ 427 + if (check_valid_map(sbi, segno, off) == 0) { 428 + f2fs_put_page(node_page, 1); 429 + continue; 430 + } 431 + 425 432 /* set page dirty and write it */ 426 433 if (gc_type == FG_GC) { 427 434 f2fs_wait_on_page_writeback(node_page, NODE); ··· 536 531 f2fs_wait_on_page_writeback(page, DATA); 537 532 538 533 if (clear_page_dirty_for_io(page)) 539 - inode_dec_dirty_dents(inode); 534 + inode_dec_dirty_pages(inode); 540 535 set_cold_data(page); 541 536 do_write_data_page(page, &fio); 542 537 clear_cold_data(page); ··· 693 688 int gc_type = BG_GC; 694 689 int nfree = 0; 695 690 int ret = -1; 691 + struct cp_control cpc = { 692 + .reason = CP_SYNC, 693 + }; 696 694 697 695 INIT_LIST_HEAD(&ilist); 698 696 gc_more: ··· 706 698 707 699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 708 700 gc_type = FG_GC; 709 - write_checkpoint(sbi, false); 701 + write_checkpoint(sbi, &cpc); 710 702 } 711 703 712 704 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) ··· 731 723 goto gc_more; 732 724 733 725 if (gc_type == FG_GC) 734 - write_checkpoint(sbi, false); 726 + write_checkpoint(sbi, &cpc); 735 727 stop: 736 728 mutex_unlock(&sbi->gc_mutex); 737 729
+10 -10
fs/f2fs/inline.c
··· 15 15 16 16 bool f2fs_may_inline(struct inode *inode) 17 17 { 18 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 19 18 block_t nr_blocks; 20 19 loff_t i_size; 21 20 22 - if (!test_opt(sbi, INLINE_DATA)) 21 + if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) 22 + return false; 23 + 24 + if (f2fs_is_atomic_file(inode)) 23 25 return false; 24 26 25 27 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; ··· 37 35 38 36 int f2fs_read_inline_data(struct inode *inode, struct page *page) 39 37 { 40 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 41 38 struct page *ipage; 42 39 void *src_addr, *dst_addr; 43 40 ··· 45 44 goto out; 46 45 } 47 46 48 - ipage = get_node_page(sbi, inode->i_ino); 47 + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 49 48 if (IS_ERR(ipage)) { 50 49 unlock_page(page); 51 50 return PTR_ERR(ipage); ··· 74 73 struct dnode_of_data dn; 75 74 void *src_addr, *dst_addr; 76 75 block_t new_blk_addr; 77 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 76 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 78 77 struct f2fs_io_info fio = { 79 78 .type = DATA, 80 79 .rw = WRITE_SYNC | REQ_PRIO, ··· 190 189 191 190 void truncate_inline_data(struct inode *inode, u64 from) 192 191 { 193 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 194 192 struct page *ipage; 195 193 196 194 if (from >= MAX_INLINE_DATA) 197 195 return; 198 196 199 - ipage = get_node_page(sbi, inode->i_ino); 197 + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 200 198 if (IS_ERR(ipage)) 201 199 return; 202 200 ··· 209 209 210 210 bool recover_inline_data(struct inode *inode, struct page *npage) 211 211 { 212 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 212 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 213 213 struct f2fs_inode *ri = NULL; 214 214 void *src_addr, *dst_addr; 215 215 struct page *ipage; ··· 229 229 ri && (ri->i_inline & F2FS_INLINE_DATA)) { 230 230 process_inline: 231 231 ipage = get_node_page(sbi, inode->i_ino); 232 - f2fs_bug_on(IS_ERR(ipage)); 232 + f2fs_bug_on(sbi, IS_ERR(ipage)); 233 233 234 234 f2fs_wait_on_page_writeback(ipage, NODE); 235 235 ··· 243 243 244 244 if (f2fs_has_inline_data(inode)) { 245 245 ipage = get_node_page(sbi, inode->i_ino); 246 - f2fs_bug_on(IS_ERR(ipage)); 246 + f2fs_bug_on(sbi, IS_ERR(ipage)); 247 247 f2fs_wait_on_page_writeback(ipage, NODE); 248 248 zero_user_segment(ipage, INLINE_DATA_OFFSET, 249 249 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+32 -5
fs/f2fs/inode.c
··· 69 69 70 70 static int do_read_inode(struct inode *inode) 71 71 { 72 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 72 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 73 73 struct f2fs_inode_info *fi = F2FS_I(inode); 74 74 struct page *node_page; 75 75 struct f2fs_inode *ri; ··· 218 218 219 219 void update_inode_page(struct inode *inode) 220 220 { 221 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 221 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 222 222 struct page *node_page; 223 223 retry: 224 224 node_page = get_node_page(sbi, inode->i_ino); ··· 238 238 239 239 int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 240 240 { 241 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 241 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 242 242 243 243 if (inode->i_ino == F2FS_NODE_INO(sbi) || 244 244 inode->i_ino == F2FS_META_INO(sbi)) ··· 266 266 */ 267 267 void f2fs_evict_inode(struct inode *inode) 268 268 { 269 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 270 270 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 271 + 272 + /* some remained atomic pages should discarded */ 273 + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) 274 + commit_inmem_pages(inode, true); 271 275 272 276 trace_f2fs_evict_inode(inode); 273 277 truncate_inode_pages_final(&inode->i_data); ··· 280 276 inode->i_ino == F2FS_META_INO(sbi)) 281 277 goto out_clear; 282 278 283 - f2fs_bug_on(get_dirty_dents(inode)); 279 + f2fs_bug_on(sbi, get_dirty_pages(inode)); 284 280 remove_dirty_dir_inode(inode); 285 281 286 282 if (inode->i_nlink || is_bad_inode(inode)) ··· 309 305 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 310 306 out_clear: 311 307 clear_inode(inode); 308 + } 309 + 310 + /* caller should call f2fs_lock_op() */ 311 + void handle_failed_inode(struct inode *inode) 312 + { 313 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 314 + 315 + clear_nlink(inode); 316 + make_bad_inode(inode); 317 + unlock_new_inode(inode); 318 + 319 + i_size_write(inode, 0); 320 + if (F2FS_HAS_BLOCKS(inode)) 321 + f2fs_truncate(inode); 322 + 323 + remove_inode_page(inode); 324 + stat_dec_inline_inode(inode); 325 + 326 + alloc_nid_failed(sbi, inode->i_ino); 327 + f2fs_unlock_op(sbi); 328 + 329 + /* iput will drop the inode object */ 330 + iput(inode); 312 331 }
+21 -32
fs/f2fs/namei.c
··· 23 23 24 24 static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 25 25 { 26 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 26 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 27 27 nid_t ino; 28 28 struct inode *inode; 29 29 bool nid_free = false; ··· 102 102 static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 103 103 bool excl) 104 104 { 105 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 105 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 106 106 struct inode *inode; 107 107 nid_t ino = 0; 108 108 int err; ··· 123 123 124 124 f2fs_lock_op(sbi); 125 125 err = f2fs_add_link(dentry, inode); 126 - f2fs_unlock_op(sbi); 127 126 if (err) 128 127 goto out; 128 + f2fs_unlock_op(sbi); 129 129 130 130 alloc_nid_done(sbi, ino); 131 131 ··· 133 133 unlock_new_inode(inode); 134 134 return 0; 135 135 out: 136 - clear_nlink(inode); 137 - iget_failed(inode); 138 - alloc_nid_failed(sbi, ino); 136 + handle_failed_inode(inode); 139 137 return err; 140 138 } 141 139 ··· 141 143 struct dentry *dentry) 142 144 { 143 145 struct inode *inode = old_dentry->d_inode; 144 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 146 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 145 147 int err; 146 148 147 149 f2fs_balance_fs(sbi); ··· 152 154 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 153 155 f2fs_lock_op(sbi); 154 156 err = f2fs_add_link(dentry, inode); 155 - f2fs_unlock_op(sbi); 156 157 if (err) 157 158 goto out; 159 + f2fs_unlock_op(sbi); 158 160 159 161 d_instantiate(dentry, inode); 160 162 return 0; 161 163 out: 162 164 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 163 165 iput(inode); 166 + f2fs_unlock_op(sbi); 164 167 return err; 165 168 } 166 169 ··· 202 203 203 204 static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 204 205 { 205 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 206 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 206 207 struct inode *inode = dentry->d_inode; 207 208 struct f2fs_dir_entry *de; 208 209 struct page *page; ··· 236 237 static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 237 238 const char *symname) 238 239 { 239 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 240 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 240 241 struct inode *inode; 241 242 size_t symlen = strlen(symname) + 1; 242 243 int err; ··· 252 253 253 254 f2fs_lock_op(sbi); 254 255 err = f2fs_add_link(dentry, inode); 255 - f2fs_unlock_op(sbi); 256 256 if (err) 257 257 goto out; 258 + f2fs_unlock_op(sbi); 258 259 259 260 err = page_symlink(inode, symname, symlen); 260 261 alloc_nid_done(sbi, inode->i_ino); ··· 263 264 unlock_new_inode(inode); 264 265 return err; 265 266 out: 266 - clear_nlink(inode); 267 - iget_failed(inode); 268 - alloc_nid_failed(sbi, inode->i_ino); 267 + handle_failed_inode(inode); 269 268 return err; 270 269 } 271 270 272 271 static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 273 272 { 274 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 273 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 275 274 struct inode *inode; 276 275 int err; 277 276 ··· 287 290 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 288 291 f2fs_lock_op(sbi); 289 292 err = f2fs_add_link(dentry, inode); 290 - f2fs_unlock_op(sbi); 291 293 if (err) 292 294 goto out_fail; 295 + f2fs_unlock_op(sbi); 293 296 294 297 alloc_nid_done(sbi, inode->i_ino); 295 298 ··· 300 303 301 304 out_fail: 302 305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 303 - clear_nlink(inode); 304 - iget_failed(inode); 305 - alloc_nid_failed(sbi, inode->i_ino); 306 + handle_failed_inode(inode); 306 307 return err; 307 308 } 308 309 ··· 315 320 static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 316 321 umode_t mode, dev_t rdev) 317 322 { 318 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 323 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 319 324 struct inode *inode; 320 325 int err = 0; 321 326 ··· 333 338 334 339 f2fs_lock_op(sbi); 335 340 err = f2fs_add_link(dentry, inode); 336 - f2fs_unlock_op(sbi); 337 341 if (err) 338 342 goto out; 343 + f2fs_unlock_op(sbi); 339 344 340 345 alloc_nid_done(sbi, inode->i_ino); 341 346 d_instantiate(dentry, inode); 342 347 unlock_new_inode(inode); 343 348 return 0; 344 349 out: 345 - clear_nlink(inode); 346 - iget_failed(inode); 347 - alloc_nid_failed(sbi, inode->i_ino); 350 + handle_failed_inode(inode); 348 351 return err; 349 352 } 350 353 351 354 static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 352 355 struct inode *new_dir, struct dentry *new_dentry) 353 356 { 354 - struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); 357 + struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); 355 358 struct inode *old_inode = old_dentry->d_inode; 356 359 struct inode *new_inode = new_dentry->d_inode; 357 360 struct page *old_dir_page; ··· 473 480 static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, 474 481 struct inode *new_dir, struct dentry *new_dentry) 475 482 { 476 - struct super_block *sb = old_dir->i_sb; 477 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 483 + struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); 478 484 struct inode *old_inode = old_dentry->d_inode; 479 485 struct inode *new_inode = new_dentry->d_inode; 480 486 struct page *old_dir_page, *new_dir_page; ··· 634 642 635 643 static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 636 644 { 637 - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 645 + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 638 646 struct inode *inode; 639 647 int err; 640 648 ··· 670 678 release_out: 671 679 release_orphan_inode(sbi); 672 680 out: 673 - f2fs_unlock_op(sbi); 674 - clear_nlink(inode); 675 - iget_failed(inode); 676 - alloc_nid_failed(sbi, inode->i_ino); 681 + handle_failed_inode(inode); 677 682 return err; 678 683 } 679 684
+234 -230
fs/f2fs/node.c
··· 54 54 static void clear_node_page_dirty(struct page *page) 55 55 { 56 56 struct address_space *mapping = page->mapping; 57 - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 58 57 unsigned int long flags; 59 58 60 59 if (PageDirty(page)) { ··· 64 65 spin_unlock_irqrestore(&mapping->tree_lock, flags); 65 66 66 67 clear_page_dirty_for_io(page); 67 - dec_page_count(sbi, F2FS_DIRTY_NODES); 68 + dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); 68 69 } 69 70 ClearPageUptodate(page); 70 71 } ··· 91 92 /* get current nat block page with lock */ 92 93 src_page = get_meta_page(sbi, src_off); 93 94 dst_page = grab_meta_page(sbi, dst_off); 94 - f2fs_bug_on(PageDirty(src_page)); 95 + f2fs_bug_on(sbi, PageDirty(src_page)); 95 96 96 97 src_addr = page_address(src_page); 97 98 dst_addr = page_address(dst_page); ··· 123 124 kmem_cache_free(nat_entry_slab, e); 124 125 } 125 126 126 - int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 127 + static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 128 + struct nat_entry *ne) 129 + { 130 + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 131 + struct nat_entry_set *head; 132 + 133 + if (get_nat_flag(ne, IS_DIRTY)) 134 + return; 135 + retry: 136 + head = radix_tree_lookup(&nm_i->nat_set_root, set); 137 + if (!head) { 138 + head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); 139 + 140 + INIT_LIST_HEAD(&head->entry_list); 141 + INIT_LIST_HEAD(&head->set_list); 142 + head->set = set; 143 + head->entry_cnt = 0; 144 + 145 + if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { 146 + cond_resched(); 147 + goto retry; 148 + } 149 + } 150 + list_move_tail(&ne->list, &head->entry_list); 151 + nm_i->dirty_nat_cnt++; 152 + head->entry_cnt++; 153 + set_nat_flag(ne, IS_DIRTY, true); 154 + } 155 + 156 + static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 157 + struct nat_entry *ne) 158 + { 159 + nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; 160 + struct nat_entry_set *head; 161 + 162 + head = radix_tree_lookup(&nm_i->nat_set_root, set); 163 + if (head) { 164 + list_move_tail(&ne->list, &nm_i->nat_entries); 165 + set_nat_flag(ne, IS_DIRTY, false); 166 + head->entry_cnt--; 167 + nm_i->dirty_nat_cnt--; 168 + } 169 + } 170 + 171 + static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, 172 + nid_t start, unsigned int nr, struct nat_entry_set **ep) 173 + { 174 + return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, 175 + start, nr); 176 + } 177 + 178 + bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 127 179 { 128 180 struct f2fs_nm_info *nm_i = NM_I(sbi); 129 181 struct nat_entry *e; 130 - int is_cp = 1; 182 + bool is_cp = true; 131 183 132 184 read_lock(&nm_i->nat_tree_lock); 133 185 e = __lookup_nat_cache(nm_i, nid); 134 - if (e && !e->checkpointed) 135 - is_cp = 0; 186 + if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 187 + is_cp = false; 136 188 read_unlock(&nm_i->nat_tree_lock); 137 189 return is_cp; 138 190 } 139 191 140 - bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) 192 + bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) 141 193 { 142 194 struct f2fs_nm_info *nm_i = NM_I(sbi); 143 195 struct nat_entry *e; 144 - bool fsync_done = false; 196 + bool fsynced = false; 145 197 146 198 read_lock(&nm_i->nat_tree_lock); 147 - e = __lookup_nat_cache(nm_i, nid); 148 - if (e) 149 - fsync_done = e->fsync_done; 199 + e = __lookup_nat_cache(nm_i, ino); 200 + if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) 201 + fsynced = true; 150 202 read_unlock(&nm_i->nat_tree_lock); 151 - return fsync_done; 203 + return fsynced; 152 204 } 153 205 154 - void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) 206 + bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) 155 207 { 156 208 struct f2fs_nm_info *nm_i = NM_I(sbi); 157 209 struct nat_entry *e; 210 + bool need_update = true; 158 211 159 - write_lock(&nm_i->nat_tree_lock); 160 - e = __lookup_nat_cache(nm_i, nid); 161 - if (e) 162 - e->fsync_done = false; 163 - write_unlock(&nm_i->nat_tree_lock); 212 + read_lock(&nm_i->nat_tree_lock); 213 + e = __lookup_nat_cache(nm_i, ino); 214 + if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 215 + (get_nat_flag(e, IS_CHECKPOINTED) || 216 + get_nat_flag(e, HAS_FSYNCED_INODE))) 217 + need_update = false; 218 + read_unlock(&nm_i->nat_tree_lock); 219 + return need_update; 164 220 } 165 221 166 222 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) ··· 231 177 } 232 178 memset(new, 0, sizeof(struct nat_entry)); 233 179 nat_set_nid(new, nid); 234 - new->checkpointed = true; 180 + nat_reset_flag(new); 235 181 list_add_tail(&new->list, &nm_i->nat_entries); 236 182 nm_i->nat_cnt++; 237 183 return new; ··· 270 216 goto retry; 271 217 } 272 218 e->ni = *ni; 273 - f2fs_bug_on(ni->blk_addr == NEW_ADDR); 219 + f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 274 220 } else if (new_blkaddr == NEW_ADDR) { 275 221 /* 276 222 * when nid is reallocated, ··· 278 224 * So, reinitialize it with new information. 279 225 */ 280 226 e->ni = *ni; 281 - f2fs_bug_on(ni->blk_addr != NULL_ADDR); 227 + f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 282 228 } 283 229 284 230 /* sanity check */ 285 - f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 286 - f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 231 + f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 232 + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && 287 233 new_blkaddr == NULL_ADDR); 288 - f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && 234 + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && 289 235 new_blkaddr == NEW_ADDR); 290 - f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && 236 + f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && 291 237 nat_get_blkaddr(e) != NULL_ADDR && 292 238 new_blkaddr == NEW_ADDR); 293 239 ··· 299 245 300 246 /* change address */ 301 247 nat_set_blkaddr(e, new_blkaddr); 248 + if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) 249 + set_nat_flag(e, IS_CHECKPOINTED, false); 302 250 __set_nat_cache_dirty(nm_i, e); 303 251 304 252 /* update fsync_mark if its inode nat entry is still alive */ 305 253 e = __lookup_nat_cache(nm_i, ni->ino); 306 - if (e) 307 - e->fsync_done = fsync_done; 254 + if (e) { 255 + if (fsync_done && ni->nid == ni->ino) 256 + set_nat_flag(e, HAS_FSYNCED_INODE, true); 257 + set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 258 + } 308 259 write_unlock(&nm_i->nat_tree_lock); 309 260 } 310 261 ··· 470 411 */ 471 412 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 472 413 { 473 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 414 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 474 415 struct page *npage[4]; 475 416 struct page *parent; 476 417 int offset[4]; ··· 563 504 564 505 static void truncate_node(struct dnode_of_data *dn) 565 506 { 566 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 507 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 567 508 struct node_info ni; 568 509 569 510 get_node_info(sbi, dn->nid, &ni); 570 511 if (dn->inode->i_blocks == 0) { 571 - f2fs_bug_on(ni.blk_addr != NULL_ADDR); 512 + f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); 572 513 goto invalidate; 573 514 } 574 - f2fs_bug_on(ni.blk_addr == NULL_ADDR); 515 + f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 575 516 576 517 /* Deallocate node address */ 577 518 invalidate_blocks(sbi, ni.blk_addr); ··· 599 540 600 541 static int truncate_dnode(struct dnode_of_data *dn) 601 542 { 602 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 603 543 struct page *page; 604 544 605 545 if (dn->nid == 0) 606 546 return 1; 607 547 608 548 /* get direct node */ 609 - page = get_node_page(sbi, dn->nid); 549 + page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 610 550 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 611 551 return 1; 612 552 else if (IS_ERR(page)) ··· 622 564 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 623 565 int ofs, int depth) 624 566 { 625 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 626 567 struct dnode_of_data rdn = *dn; 627 568 struct page *page; 628 569 struct f2fs_node *rn; ··· 635 578 636 579 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 637 580 638 - page = get_node_page(sbi, dn->nid); 581 + page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); 639 582 if (IS_ERR(page)) { 640 583 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 641 584 return PTR_ERR(page); ··· 693 636 static int truncate_partial_nodes(struct dnode_of_data *dn, 694 637 struct f2fs_inode *ri, int *offset, int depth) 695 638 { 696 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 697 639 struct page *pages[2]; 698 640 nid_t nid[3]; 699 641 nid_t child_nid; ··· 707 651 /* get indirect nodes in the path */ 708 652 for (i = 0; i < idx + 1; i++) { 709 653 /* reference count'll be increased */ 710 - pages[i] = get_node_page(sbi, nid[i]); 654 + pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); 711 655 if (IS_ERR(pages[i])) { 712 656 err = PTR_ERR(pages[i]); 713 657 idx = i - 1; ··· 752 696 */ 753 697 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 754 698 { 755 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 699 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 756 700 int err = 0, cont = 1; 757 701 int level, offset[4], noffset[4]; 758 702 unsigned int nofs = 0; ··· 848 792 849 793 int truncate_xattr_node(struct inode *inode, struct page *page) 850 794 { 851 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 795 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 852 796 nid_t nid = F2FS_I(inode)->i_xattr_nid; 853 797 struct dnode_of_data dn; 854 798 struct page *npage; ··· 896 840 truncate_data_blocks_range(&dn, 1); 897 841 898 842 /* 0 is possible, after f2fs_new_inode() has failed */ 899 - f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 843 + f2fs_bug_on(F2FS_I_SB(inode), 844 + inode->i_blocks != 0 && inode->i_blocks != 1); 900 845 901 846 /* will put inode & node pages */ 902 847 truncate_node(&dn); ··· 917 860 struct page *new_node_page(struct dnode_of_data *dn, 918 861 unsigned int ofs, struct page *ipage) 919 862 { 920 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 863 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 921 864 struct node_info old_ni, new_ni; 922 865 struct page *page; 923 866 int err; ··· 937 880 get_node_info(sbi, dn->nid, &old_ni); 938 881 939 882 /* Reinitialize old_ni with new node page */ 940 - f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 883 + f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); 941 884 new_ni = old_ni; 942 885 new_ni.ino = dn->inode->i_ino; 943 886 set_node_addr(sbi, &new_ni, NEW_ADDR, false); ··· 975 918 */ 976 919 static int read_node_page(struct page *page, int rw) 977 920 { 978 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 921 + struct f2fs_sb_info *sbi = F2FS_P_SB(page); 979 922 struct node_info ni; 980 923 981 924 get_node_info(sbi, page->index, &ni); ··· 1051 994 */ 1052 995 struct page *get_node_page_ra(struct page *parent, int start) 1053 996 { 1054 - struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 997 + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1055 998 struct blk_plug plug; 1056 999 struct page *page; 1057 1000 int err, i, end; ··· 1181 1124 1182 1125 /* called by fsync() */ 1183 1126 if (ino && IS_DNODE(page)) { 1184 - int mark = !is_checkpointed_node(sbi, ino); 1185 1127 set_fsync_mark(page, 1); 1186 - if (IS_INODE(page)) 1187 - set_dentry_mark(page, mark); 1128 + if (IS_INODE(page)) { 1129 + if (!is_checkpointed_node(sbi, ino) && 1130 + !has_fsynced_inode(sbi, ino)) 1131 + set_dentry_mark(page, 1); 1132 + else 1133 + set_dentry_mark(page, 0); 1134 + } 1188 1135 nwritten++; 1189 1136 } else { 1190 1137 set_fsync_mark(page, 0); ··· 1267 1206 static int f2fs_write_node_page(struct page *page, 1268 1207 struct writeback_control *wbc) 1269 1208 { 1270 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1209 + struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1271 1210 nid_t nid; 1272 1211 block_t new_addr; 1273 1212 struct node_info ni; ··· 1287 1226 1288 1227 /* get old block addr of this node page */ 1289 1228 nid = nid_of_node(page); 1290 - f2fs_bug_on(page->index != nid); 1229 + f2fs_bug_on(sbi, page->index != nid); 1291 1230 1292 1231 get_node_info(sbi, nid, &ni); 1293 1232 ··· 1318 1257 static int f2fs_write_node_pages(struct address_space *mapping, 1319 1258 struct writeback_control *wbc) 1320 1259 { 1321 - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1260 + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 1322 1261 long diff; 1323 1262 1324 1263 trace_f2fs_writepages(mapping->host, wbc, NODE); ··· 1343 1282 1344 1283 static int f2fs_set_node_page_dirty(struct page *page) 1345 1284 { 1346 - struct address_space *mapping = page->mapping; 1347 - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1348 - 1349 1285 trace_f2fs_set_page_dirty(page, NODE); 1350 1286 1351 1287 SetPageUptodate(page); 1352 1288 if (!PageDirty(page)) { 1353 1289 __set_page_dirty_nobuffers(page); 1354 - inc_page_count(sbi, F2FS_DIRTY_NODES); 1290 + inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); 1355 1291 SetPagePrivate(page); 1356 1292 return 1; 1357 1293 } ··· 1359 1301 unsigned int length) 1360 1302 { 1361 1303 struct inode *inode = page->mapping->host; 1362 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1363 1304 if (PageDirty(page)) 1364 - dec_page_count(sbi, F2FS_DIRTY_NODES); 1305 + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); 1365 1306 ClearPagePrivate(page); 1366 1307 } 1367 1308 ··· 1413 1356 read_lock(&nm_i->nat_tree_lock); 1414 1357 ne = __lookup_nat_cache(nm_i, nid); 1415 1358 if (ne && 1416 - (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) 1359 + (!get_nat_flag(ne, IS_CHECKPOINTED) || 1360 + nat_get_blkaddr(ne) != NULL_ADDR)) 1417 1361 allocated = true; 1418 1362 read_unlock(&nm_i->nat_tree_lock); 1419 1363 if (allocated) ··· 1471 1413 break; 1472 1414 1473 1415 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1474 - f2fs_bug_on(blk_addr == NEW_ADDR); 1416 + f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1475 1417 if (blk_addr == NULL_ADDR) { 1476 1418 if (add_free_nid(sbi, start_nid, true) < 0) 1477 1419 break; ··· 1541 1483 1542 1484 /* We should not use stale free nids created by build_free_nids */ 1543 1485 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1544 - f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1486 + f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1545 1487 list_for_each_entry(i, &nm_i->free_nid_list, list) 1546 1488 if (i->state == NID_NEW) 1547 1489 break; 1548 1490 1549 - f2fs_bug_on(i->state != NID_NEW); 1491 + f2fs_bug_on(sbi, i->state != NID_NEW); 1550 1492 *nid = i->nid; 1551 1493 i->state = NID_ALLOC; 1552 1494 nm_i->fcnt--; ··· 1572 1514 1573 1515 spin_lock(&nm_i->free_nid_list_lock); 1574 1516 i = __lookup_free_nid_list(nm_i, nid); 1575 - f2fs_bug_on(!i || i->state != NID_ALLOC); 1517 + f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1576 1518 __del_from_free_nid_list(nm_i, i); 1577 1519 spin_unlock(&nm_i->free_nid_list_lock); 1578 1520 ··· 1593 1535 1594 1536 spin_lock(&nm_i->free_nid_list_lock); 1595 1537 i = __lookup_free_nid_list(nm_i, nid); 1596 - f2fs_bug_on(!i || i->state != NID_ALLOC); 1538 + f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1597 1539 if (!available_free_memory(sbi, FREE_NIDS)) { 1598 1540 __del_from_free_nid_list(nm_i, i); 1599 1541 need_free = true; ··· 1609 1551 1610 1552 void recover_inline_xattr(struct inode *inode, struct page *page) 1611 1553 { 1612 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1613 1554 void *src_addr, *dst_addr; 1614 1555 size_t inline_size; 1615 1556 struct page *ipage; 1616 1557 struct f2fs_inode *ri; 1617 1558 1618 - ipage = get_node_page(sbi, inode->i_ino); 1619 - f2fs_bug_on(IS_ERR(ipage)); 1559 + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 1560 + f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); 1620 1561 1621 1562 ri = F2FS_INODE(page); 1622 1563 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { ··· 1636 1579 1637 1580 void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1638 1581 { 1639 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1582 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1640 1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1641 1584 nid_t new_xnid = nid_of_node(page); 1642 1585 struct node_info ni; ··· 1647 1590 1648 1591 /* Deallocate node address */ 1649 1592 get_node_info(sbi, prev_xnid, &ni); 1650 - f2fs_bug_on(ni.blk_addr == NULL_ADDR); 1593 + f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 1651 1594 invalidate_blocks(sbi, ni.blk_addr); 1652 1595 dec_valid_node_count(sbi, inode); 1653 1596 set_node_addr(sbi, &ni, NULL_ADDR, false); ··· 1655 1598 recover_xnid: 1656 1599 /* 2: allocate new xattr nid */ 1657 1600 if (unlikely(!inc_valid_node_count(sbi, inode))) 1658 - f2fs_bug_on(1); 1601 + f2fs_bug_on(sbi, 1); 1659 1602 1660 1603 remove_free_nid(NM_I(sbi), new_xnid); 1661 1604 get_node_info(sbi, new_xnid, &ni); ··· 1748 1691 struct f2fs_summary *sum_entry; 1749 1692 struct inode *inode = sbi->sb->s_bdev->bd_inode; 1750 1693 block_t addr; 1751 - int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1694 + int bio_blocks = MAX_BIO_BLOCKS(sbi); 1752 1695 struct page *pages[bio_blocks]; 1753 1696 int i, idx, last_offset, nrpages, err = 0; 1754 1697 ··· 1790 1733 return err; 1791 1734 } 1792 1735 1793 - static struct nat_entry_set *grab_nat_entry_set(void) 1794 - { 1795 - struct nat_entry_set *nes = 1796 - f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); 1797 - 1798 - nes->entry_cnt = 0; 1799 - INIT_LIST_HEAD(&nes->set_list); 1800 - INIT_LIST_HEAD(&nes->entry_list); 1801 - return nes; 1802 - } 1803 - 1804 - static void release_nat_entry_set(struct nat_entry_set *nes, 1805 - struct f2fs_nm_info *nm_i) 1806 - { 1807 - f2fs_bug_on(!list_empty(&nes->entry_list)); 1808 - 1809 - nm_i->dirty_nat_cnt -= nes->entry_cnt; 1810 - list_del(&nes->set_list); 1811 - kmem_cache_free(nat_entry_set_slab, nes); 1812 - } 1813 - 1814 - static void adjust_nat_entry_set(struct nat_entry_set *nes, 1815 - struct list_head *head) 1816 - { 1817 - struct nat_entry_set *next = nes; 1818 - 1819 - if (list_is_last(&nes->set_list, head)) 1820 - return; 1821 - 1822 - list_for_each_entry_continue(next, head, set_list) 1823 - if (nes->entry_cnt <= next->entry_cnt) 1824 - break; 1825 - 1826 - list_move_tail(&nes->set_list, &next->set_list); 1827 - } 1828 - 1829 - static void add_nat_entry(struct nat_entry *ne, struct list_head *head) 1830 - { 1831 - struct nat_entry_set *nes; 1832 - nid_t start_nid = START_NID(ne->ni.nid); 1833 - 1834 - list_for_each_entry(nes, head, set_list) { 1835 - if (nes->start_nid == start_nid) { 1836 - list_move_tail(&ne->list, &nes->entry_list); 1837 - nes->entry_cnt++; 1838 - adjust_nat_entry_set(nes, head); 1839 - return; 1840 - } 1841 - } 1842 - 1843 - nes = grab_nat_entry_set(); 1844 - 1845 - nes->start_nid = start_nid; 1846 - list_move_tail(&ne->list, &nes->entry_list); 1847 - nes->entry_cnt++; 1848 - list_add(&nes->set_list, head); 1849 - } 1850 - 1851 - static void merge_nats_in_set(struct f2fs_sb_info *sbi) 1852 - { 1853 - struct f2fs_nm_info *nm_i = NM_I(sbi); 1854 - struct list_head *dirty_list = &nm_i->dirty_nat_entries; 1855 - struct list_head *set_list = &nm_i->nat_entry_set; 1856 - struct nat_entry *ne, *tmp; 1857 - 1858 - write_lock(&nm_i->nat_tree_lock); 1859 - list_for_each_entry_safe(ne, tmp, dirty_list, list) { 1860 - if (nat_get_blkaddr(ne) == NEW_ADDR) 1861 - continue; 1862 - add_nat_entry(ne, set_list); 1863 - nm_i->dirty_nat_cnt++; 1864 - } 1865 - write_unlock(&nm_i->nat_tree_lock); 1866 - } 1867 - 1868 - static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) 1869 - { 1870 - if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) 1871 - return true; 1872 - else 1873 - return false; 1874 - } 1875 - 1876 1736 static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1877 1737 { 1878 1738 struct f2fs_nm_info *nm_i = NM_I(sbi); ··· 1824 1850 mutex_unlock(&curseg->curseg_mutex); 1825 1851 } 1826 1852 1853 + static void __adjust_nat_entry_set(struct nat_entry_set *nes, 1854 + struct list_head *head, int max) 1855 + { 1856 + struct nat_entry_set *cur; 1857 + 1858 + if (nes->entry_cnt >= max) 1859 + goto add_out; 1860 + 1861 + list_for_each_entry(cur, head, set_list) { 1862 + if (cur->entry_cnt >= nes->entry_cnt) { 1863 + list_add(&nes->set_list, cur->set_list.prev); 1864 + return; 1865 + } 1866 + } 1867 + add_out: 1868 + list_add_tail(&nes->set_list, head); 1869 + } 1870 + 1871 + static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 1872 + struct nat_entry_set *set) 1873 + { 1874 + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1875 + struct f2fs_summary_block *sum = curseg->sum_blk; 1876 + nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; 1877 + bool to_journal = true; 1878 + struct f2fs_nat_block *nat_blk; 1879 + struct nat_entry *ne, *cur; 1880 + struct page *page = NULL; 1881 + 1882 + /* 1883 + * there are two steps to flush nat entries: 1884 + * #1, flush nat entries to journal in current hot data summary block. 1885 + * #2, flush nat entries to nat page. 1886 + */ 1887 + if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL)) 1888 + to_journal = false; 1889 + 1890 + if (to_journal) { 1891 + mutex_lock(&curseg->curseg_mutex); 1892 + } else { 1893 + page = get_next_nat_page(sbi, start_nid); 1894 + nat_blk = page_address(page); 1895 + f2fs_bug_on(sbi, !nat_blk); 1896 + } 1897 + 1898 + /* flush dirty nats in nat entry set */ 1899 + list_for_each_entry_safe(ne, cur, &set->entry_list, list) { 1900 + struct f2fs_nat_entry *raw_ne; 1901 + nid_t nid = nat_get_nid(ne); 1902 + int offset; 1903 + 1904 + if (nat_get_blkaddr(ne) == NEW_ADDR) 1905 + continue; 1906 + 1907 + if (to_journal) { 1908 + offset = lookup_journal_in_cursum(sum, 1909 + NAT_JOURNAL, nid, 1); 1910 + f2fs_bug_on(sbi, offset < 0); 1911 + raw_ne = &nat_in_journal(sum, offset); 1912 + nid_in_journal(sum, offset) = cpu_to_le32(nid); 1913 + } else { 1914 + raw_ne = &nat_blk->entries[nid - start_nid]; 1915 + } 1916 + raw_nat_from_node_info(raw_ne, &ne->ni); 1917 + 1918 + write_lock(&NM_I(sbi)->nat_tree_lock); 1919 + nat_reset_flag(ne); 1920 + __clear_nat_cache_dirty(NM_I(sbi), ne); 1921 + write_unlock(&NM_I(sbi)->nat_tree_lock); 1922 + 1923 + if (nat_get_blkaddr(ne) == NULL_ADDR) 1924 + add_free_nid(sbi, nid, false); 1925 + } 1926 + 1927 + if (to_journal) 1928 + mutex_unlock(&curseg->curseg_mutex); 1929 + else 1930 + f2fs_put_page(page, 1); 1931 + 1932 + if (!set->entry_cnt) { 1933 + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 1934 + kmem_cache_free(nat_entry_set_slab, set); 1935 + } 1936 + } 1937 + 1827 1938 /* 1828 1939 * This function is called during the checkpointing process. 1829 1940 */ ··· 1917 1858 struct f2fs_nm_info *nm_i = NM_I(sbi); 1918 1859 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1919 1860 struct f2fs_summary_block *sum = curseg->sum_blk; 1920 - struct nat_entry_set *nes, *tmp; 1921 - struct list_head *head = &nm_i->nat_entry_set; 1922 - bool to_journal = true; 1923 - 1924 - /* merge nat entries of dirty list to nat entry set temporarily */ 1925 - merge_nats_in_set(sbi); 1861 + struct nat_entry_set *setvec[NATVEC_SIZE]; 1862 + struct nat_entry_set *set, *tmp; 1863 + unsigned int found; 1864 + nid_t set_idx = 0; 1865 + LIST_HEAD(sets); 1926 1866 1927 1867 /* 1928 1868 * if there are no enough space in journal to store dirty nat 1929 1869 * entries, remove all entries from journal and merge them 1930 1870 * into nat entry set. 1931 1871 */ 1932 - if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { 1872 + if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 1933 1873 remove_nats_in_journal(sbi); 1934 - 1935 - /* 1936 - * merge nat entries of dirty list to nat entry set temporarily 1937 - */ 1938 - merge_nats_in_set(sbi); 1939 - } 1940 1874 1941 1875 if (!nm_i->dirty_nat_cnt) 1942 1876 return; 1943 1877 1944 - /* 1945 - * there are two steps to flush nat entries: 1946 - * #1, flush nat entries to journal in current hot data summary block. 1947 - * #2, flush nat entries to nat page. 1948 - */ 1949 - list_for_each_entry_safe(nes, tmp, head, set_list) { 1950 - struct f2fs_nat_block *nat_blk; 1951 - struct nat_entry *ne, *cur; 1952 - struct page *page; 1953 - nid_t start_nid = nes->start_nid; 1954 - 1955 - if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) 1956 - to_journal = false; 1957 - 1958 - if (to_journal) { 1959 - mutex_lock(&curseg->curseg_mutex); 1960 - } else { 1961 - page = get_next_nat_page(sbi, start_nid); 1962 - nat_blk = page_address(page); 1963 - f2fs_bug_on(!nat_blk); 1964 - } 1965 - 1966 - /* flush dirty nats in nat entry set */ 1967 - list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { 1968 - struct f2fs_nat_entry *raw_ne; 1969 - nid_t nid = nat_get_nid(ne); 1970 - int offset; 1971 - 1972 - if (to_journal) { 1973 - offset = lookup_journal_in_cursum(sum, 1974 - NAT_JOURNAL, nid, 1); 1975 - f2fs_bug_on(offset < 0); 1976 - raw_ne = &nat_in_journal(sum, offset); 1977 - nid_in_journal(sum, offset) = cpu_to_le32(nid); 1978 - } else { 1979 - raw_ne = &nat_blk->entries[nid - start_nid]; 1980 - } 1981 - raw_nat_from_node_info(raw_ne, &ne->ni); 1982 - 1983 - if (nat_get_blkaddr(ne) == NULL_ADDR && 1984 - add_free_nid(sbi, nid, false) <= 0) { 1985 - write_lock(&nm_i->nat_tree_lock); 1986 - __del_from_nat_cache(nm_i, ne); 1987 - write_unlock(&nm_i->nat_tree_lock); 1988 - } else { 1989 - write_lock(&nm_i->nat_tree_lock); 1990 - __clear_nat_cache_dirty(nm_i, ne); 1991 - write_unlock(&nm_i->nat_tree_lock); 1992 - } 1993 - } 1994 - 1995 - if (to_journal) 1996 - mutex_unlock(&curseg->curseg_mutex); 1997 - else 1998 - f2fs_put_page(page, 1); 1999 - 2000 - release_nat_entry_set(nes, nm_i); 1878 + while ((found = __gang_lookup_nat_set(nm_i, 1879 + set_idx, NATVEC_SIZE, setvec))) { 1880 + unsigned idx; 1881 + set_idx = setvec[found - 1]->set + 1; 1882 + for (idx = 0; idx < found; idx++) 1883 + __adjust_nat_entry_set(setvec[idx], &sets, 1884 + MAX_NAT_JENTRIES(sum)); 2001 1885 } 2002 1886 2003 - f2fs_bug_on(!list_empty(head)); 2004 - f2fs_bug_on(nm_i->dirty_nat_cnt); 1887 + /* flush dirty nats in nat entry set */ 1888 + list_for_each_entry_safe(set, tmp, &sets, set_list) 1889 + __flush_nat_entry_set(sbi, set); 1890 + 1891 + f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2005 1892 } 2006 1893 2007 1894 static int init_node_manager(struct f2fs_sb_info *sbi) ··· 1974 1969 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 1975 1970 INIT_LIST_HEAD(&nm_i->free_nid_list); 1976 1971 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1972 + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); 1977 1973 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 - INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1979 - INIT_LIST_HEAD(&nm_i->nat_entry_set); 1980 1974 1981 1975 mutex_init(&nm_i->build_lock); 1982 1976 spin_lock_init(&nm_i->free_nid_list_lock); ··· 2024 2020 /* destroy free nid list */ 2025 2021 spin_lock(&nm_i->free_nid_list_lock); 2026 2022 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2027 - f2fs_bug_on(i->state == NID_ALLOC); 2023 + f2fs_bug_on(sbi, i->state == NID_ALLOC); 2028 2024 __del_from_free_nid_list(nm_i, i); 2029 2025 nm_i->fcnt--; 2030 2026 spin_unlock(&nm_i->free_nid_list_lock); 2031 2027 kmem_cache_free(free_nid_slab, i); 2032 2028 spin_lock(&nm_i->free_nid_list_lock); 2033 2029 } 2034 - f2fs_bug_on(nm_i->fcnt); 2030 + f2fs_bug_on(sbi, nm_i->fcnt); 2035 2031 spin_unlock(&nm_i->free_nid_list_lock); 2036 2032 2037 2033 /* destroy nat cache */ ··· 2043 2039 for (idx = 0; idx < found; idx++) 2044 2040 __del_from_nat_cache(nm_i, natvec[idx]); 2045 2041 } 2046 - f2fs_bug_on(nm_i->nat_cnt); 2042 + f2fs_bug_on(sbi, nm_i->nat_cnt); 2047 2043 write_unlock(&nm_i->nat_tree_lock); 2048 2044 2049 2045 kfree(nm_i->nat_bitmap);
+40 -20
fs/f2fs/node.h
··· 39 39 unsigned char version; /* version of the node */ 40 40 }; 41 41 42 + enum { 43 + IS_CHECKPOINTED, /* is it checkpointed before? */ 44 + HAS_FSYNCED_INODE, /* is the inode fsynced before? */ 45 + HAS_LAST_FSYNC, /* has the latest node fsync mark? */ 46 + IS_DIRTY, /* this nat entry is dirty? */ 47 + }; 48 + 42 49 struct nat_entry { 43 50 struct list_head list; /* for clean or dirty nat list */ 44 - bool checkpointed; /* whether it is checkpointed or not */ 45 - bool fsync_done; /* whether the latest node has fsync mark */ 51 + unsigned char flag; /* for node information bits */ 46 52 struct node_info ni; /* in-memory node information */ 47 53 }; 48 54 ··· 61 55 #define nat_get_version(nat) (nat->ni.version) 62 56 #define nat_set_version(nat, v) (nat->ni.version = v) 63 57 64 - #define __set_nat_cache_dirty(nm_i, ne) \ 65 - do { \ 66 - ne->checkpointed = false; \ 67 - list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ 68 - } while (0) 69 - #define __clear_nat_cache_dirty(nm_i, ne) \ 70 - do { \ 71 - ne->checkpointed = true; \ 72 - list_move_tail(&ne->list, &nm_i->nat_entries); \ 73 - } while (0) 74 58 #define inc_node_version(version) (++version) 59 + 60 + static inline void set_nat_flag(struct nat_entry *ne, 61 + unsigned int type, bool set) 62 + { 63 + unsigned char mask = 0x01 << type; 64 + if (set) 65 + ne->flag |= mask; 66 + else 67 + ne->flag &= ~mask; 68 + } 69 + 70 + static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) 71 + { 72 + unsigned char mask = 0x01 << type; 73 + return ne->flag & mask; 74 + } 75 + 76 + static inline void nat_reset_flag(struct nat_entry *ne) 77 + { 78 + /* these states can be set only after checkpoint was done */ 79 + set_nat_flag(ne, IS_CHECKPOINTED, true); 80 + set_nat_flag(ne, HAS_FSYNCED_INODE, false); 81 + set_nat_flag(ne, HAS_LAST_FSYNC, true); 82 + } 75 83 76 84 static inline void node_info_from_raw_nat(struct node_info *ni, 77 85 struct f2fs_nat_entry *raw_ne) ··· 110 90 }; 111 91 112 92 struct nat_entry_set { 113 - struct list_head set_list; /* link with all nat sets */ 93 + struct list_head set_list; /* link with other nat sets */ 114 94 struct list_head entry_list; /* link with dirty nat entries */ 115 - nid_t start_nid; /* start nid of nats in set */ 95 + nid_t set; /* set number*/ 116 96 unsigned int entry_cnt; /* the # of nat entries in set */ 117 97 }; 118 98 ··· 130 110 int state; /* in use or not: NID_NEW or NID_ALLOC */ 131 111 }; 132 112 133 - static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) 113 + static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) 134 114 { 135 115 struct f2fs_nm_info *nm_i = NM_I(sbi); 136 116 struct free_nid *fnid; 137 117 138 - if (nm_i->fcnt <= 0) 139 - return -1; 140 118 spin_lock(&nm_i->free_nid_list_lock); 119 + if (nm_i->fcnt <= 0) { 120 + spin_unlock(&nm_i->free_nid_list_lock); 121 + return; 122 + } 141 123 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); 142 124 *nid = fnid->nid; 143 125 spin_unlock(&nm_i->free_nid_list_lock); 144 - return 0; 145 126 } 146 127 147 128 /* ··· 218 197 219 198 static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) 220 199 { 221 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 222 - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 200 + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 223 201 struct f2fs_node *rn = F2FS_NODE(page); 224 202 225 203 rn->footer.cp_ver = ckpt->checkpoint_ver;
+122 -73
fs/f2fs/recovery.c
··· 14 14 #include "node.h" 15 15 #include "segment.h" 16 16 17 + /* 18 + * Roll forward recovery scenarios. 19 + * 20 + * [Term] F: fsync_mark, D: dentry_mark 21 + * 22 + * 1. inode(x) | CP | inode(x) | dnode(F) 23 + * -> Update the latest inode(x). 24 + * 25 + * 2. inode(x) | CP | inode(F) | dnode(F) 26 + * -> No problem. 27 + * 28 + * 3. inode(x) | CP | dnode(F) | inode(x) 29 + * -> Recover to the latest dnode(F), and drop the last inode(x) 30 + * 31 + * 4. inode(x) | CP | dnode(F) | inode(F) 32 + * -> No problem. 33 + * 34 + * 5. CP | inode(x) | dnode(F) 35 + * -> The inode(DF) was missing. Should drop this dnode(F). 36 + * 37 + * 6. CP | inode(DF) | dnode(F) 38 + * -> No problem. 39 + * 40 + * 7. CP | dnode(F) | inode(DF) 41 + * -> If f2fs_iget fails, then goto next to find inode(DF). 42 + * 43 + * 8. CP | dnode(F) | inode(x) 44 + * -> If f2fs_iget fails, then goto next to find inode(DF). 45 + * But it will fail due to no inode(DF). 46 + */ 47 + 17 48 static struct kmem_cache *fsync_entry_slab; 18 49 19 50 bool space_for_roll_forward(struct f2fs_sb_info *sbi) ··· 67 36 return NULL; 68 37 } 69 38 70 - static int recover_dentry(struct page *ipage, struct inode *inode) 39 + static int recover_dentry(struct inode *inode, struct page *ipage) 71 40 { 72 41 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 73 42 nid_t pino = le32_to_cpu(raw_inode->i_pino); ··· 106 75 err = -EEXIST; 107 76 goto out_unmap_put; 108 77 } 109 - err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); 78 + err = acquire_orphan_inode(F2FS_I_SB(inode)); 110 79 if (err) { 111 80 iput(einode); 112 81 goto out_unmap_put; ··· 141 110 return err; 142 111 } 143 112 144 - static int recover_inode(struct inode *inode, struct page *node_page) 113 + static void recover_inode(struct inode *inode, struct page *page) 145 114 { 146 - struct f2fs_inode *raw_inode = F2FS_INODE(node_page); 115 + struct f2fs_inode *raw = F2FS_INODE(page); 147 116 148 - if (!IS_INODE(node_page)) 149 - return 0; 150 - 151 - inode->i_mode = le16_to_cpu(raw_inode->i_mode); 152 - i_size_write(inode, le64_to_cpu(raw_inode->i_size)); 153 - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 154 - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 155 - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 156 - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 157 - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 158 - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 159 - 160 - if (is_dent_dnode(node_page)) 161 - return recover_dentry(node_page, inode); 117 + inode->i_mode = le16_to_cpu(raw->i_mode); 118 + i_size_write(inode, le64_to_cpu(raw->i_size)); 119 + inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); 120 + inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); 121 + inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); 122 + inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 123 + inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); 124 + inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 162 125 163 126 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", 164 - ino_of_node(node_page), raw_inode->i_name); 165 - return 0; 127 + ino_of_node(page), F2FS_INODE(page)->i_name); 166 128 } 167 129 168 130 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 169 131 { 170 132 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 171 133 struct curseg_info *curseg; 172 - struct page *page; 134 + struct page *page = NULL; 173 135 block_t blkaddr; 174 136 int err = 0; 175 137 ··· 170 146 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 171 147 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 172 148 173 - /* read node page */ 174 - page = alloc_page(GFP_F2FS_ZERO); 175 - if (!page) 176 - return -ENOMEM; 177 - lock_page(page); 178 - 179 149 while (1) { 180 150 struct fsync_inode_entry *entry; 181 151 182 - err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 183 - if (err) 184 - return err; 152 + if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) 153 + return 0; 185 154 186 - lock_page(page); 155 + page = get_meta_page_ra(sbi, blkaddr); 187 156 188 157 if (cp_ver != cpver_of_node(page)) 189 158 break; ··· 197 180 } 198 181 199 182 /* add this fsync inode to the list */ 200 - entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); 183 + entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 201 184 if (!entry) { 202 185 err = -ENOMEM; 203 186 break; 204 187 } 205 - 188 + /* 189 + * CP | dnode(F) | inode(DF) 190 + * For this case, we should not give up now. 191 + */ 206 192 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 207 193 if (IS_ERR(entry->inode)) { 208 194 err = PTR_ERR(entry->inode); 209 195 kmem_cache_free(fsync_entry_slab, entry); 196 + if (err == -ENOENT) 197 + goto next; 210 198 break; 211 199 } 212 200 list_add_tail(&entry->list, head); 213 201 } 214 202 entry->blkaddr = blkaddr; 215 203 216 - err = recover_inode(entry->inode, page); 217 - if (err && err != -ENOENT) 218 - break; 204 + if (IS_INODE(page)) { 205 + entry->last_inode = blkaddr; 206 + if (is_dent_dnode(page)) 207 + entry->last_dentry = blkaddr; 208 + } 219 209 next: 220 210 /* check next segment */ 221 211 blkaddr = next_blkaddr_of_node(page); 212 + f2fs_put_page(page, 1); 222 213 } 223 - 224 - unlock_page(page); 225 - __free_pages(page, 0); 226 - 214 + f2fs_put_page(page, 1); 227 215 return err; 228 216 } 229 217 ··· 301 279 ino = ino_of_node(node_page); 302 280 f2fs_put_page(node_page, 1); 303 281 304 - /* Deallocate previous index in the node page */ 305 - inode = f2fs_iget(sbi->sb, ino); 306 - if (IS_ERR(inode)) 307 - return PTR_ERR(inode); 282 + if (ino != dn->inode->i_ino) { 283 + /* Deallocate previous index in the node page */ 284 + inode = f2fs_iget(sbi->sb, ino); 285 + if (IS_ERR(inode)) 286 + return PTR_ERR(inode); 287 + } else { 288 + inode = dn->inode; 289 + } 308 290 309 291 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 310 - le16_to_cpu(sum.ofs_in_node); 292 + le16_to_cpu(sum.ofs_in_node); 311 293 312 - truncate_hole(inode, bidx, bidx + 1); 313 - iput(inode); 294 + if (ino != dn->inode->i_ino) { 295 + truncate_hole(inode, bidx, bidx + 1); 296 + iput(inode); 297 + } else { 298 + struct dnode_of_data tdn; 299 + set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); 300 + if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 301 + return 0; 302 + if (tdn.data_blkaddr != NULL_ADDR) 303 + truncate_data_blocks_range(&tdn, 1); 304 + f2fs_put_page(tdn.node_page, 1); 305 + } 314 306 return 0; 315 307 } 316 308 ··· 367 331 f2fs_wait_on_page_writeback(dn.node_page, NODE); 368 332 369 333 get_node_info(sbi, dn.nid, &ni); 370 - f2fs_bug_on(ni.ino != ino_of_node(page)); 371 - f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); 334 + f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); 335 + f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); 372 336 373 337 for (; start < end; start++) { 374 338 block_t src, dest; ··· 380 344 if (src == NULL_ADDR) { 381 345 err = reserve_new_block(&dn); 382 346 /* We should not get -ENOSPC */ 383 - f2fs_bug_on(err); 347 + f2fs_bug_on(sbi, err); 384 348 } 385 349 386 350 /* Check the previous node page having this index */ ··· 422 386 { 423 387 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 424 388 struct curseg_info *curseg; 425 - struct page *page; 389 + struct page *page = NULL; 426 390 int err = 0; 427 391 block_t blkaddr; 428 392 ··· 430 394 curseg = CURSEG_I(sbi, type); 431 395 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 432 396 433 - /* read node page */ 434 - page = alloc_page(GFP_F2FS_ZERO); 435 - if (!page) 436 - return -ENOMEM; 437 - 438 - lock_page(page); 439 - 440 397 while (1) { 441 398 struct fsync_inode_entry *entry; 442 399 443 - err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 444 - if (err) 445 - return err; 446 - 447 - lock_page(page); 448 - 449 - if (cp_ver != cpver_of_node(page)) 400 + if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) 450 401 break; 402 + 403 + page = get_meta_page_ra(sbi, blkaddr); 404 + 405 + if (cp_ver != cpver_of_node(page)) { 406 + f2fs_put_page(page, 1); 407 + break; 408 + } 451 409 452 410 entry = get_fsync_inode(head, ino_of_node(page)); 453 411 if (!entry) 454 412 goto next; 455 - 413 + /* 414 + * inode(x) | CP | inode(x) | dnode(F) 415 + * In this case, we can lose the latest inode(x). 416 + * So, call recover_inode for the inode update. 417 + */ 418 + if (entry->last_inode == blkaddr) 419 + recover_inode(entry->inode, page); 420 + if (entry->last_dentry == blkaddr) { 421 + err = recover_dentry(entry->inode, page); 422 + if (err) { 423 + f2fs_put_page(page, 1); 424 + break; 425 + } 426 + } 456 427 err = do_recover_data(sbi, entry->inode, page, blkaddr); 457 - if (err) 428 + if (err) { 429 + f2fs_put_page(page, 1); 458 430 break; 431 + } 459 432 460 433 if (entry->blkaddr == blkaddr) { 461 434 iput(entry->inode); ··· 474 429 next: 475 430 /* check next segment */ 476 431 blkaddr = next_blkaddr_of_node(page); 432 + f2fs_put_page(page, 1); 477 433 } 478 - 479 - unlock_page(page); 480 - __free_pages(page, 0); 481 - 482 434 if (!err) 483 435 allocate_new_segments(sbi); 484 436 return err; ··· 516 474 /* step #2: recover data */ 517 475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 518 476 if (!err) 519 - f2fs_bug_on(!list_empty(&inode_list)); 477 + f2fs_bug_on(sbi, !list_empty(&inode_list)); 520 478 out: 521 479 destroy_fsync_dnodes(&inode_list); 522 480 kmem_cache_destroy(fsync_entry_slab); 481 + 482 + /* truncate meta pages to be used by the recovery */ 483 + truncate_inode_pages_range(META_MAPPING(sbi), 484 + MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); 523 485 524 486 if (err) { 525 487 truncate_inode_pages_final(NODE_MAPPING(sbi)); ··· 540 494 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 541 495 mutex_unlock(&sbi->cp_mutex); 542 496 } else if (need_writecp) { 497 + struct cp_control cpc = { 498 + .reason = CP_SYNC, 499 + }; 543 500 mutex_unlock(&sbi->cp_mutex); 544 - write_checkpoint(sbi, false); 501 + write_checkpoint(sbi, &cpc); 545 502 } else { 546 503 mutex_unlock(&sbi->cp_mutex); 547 504 }
+382 -142
fs/f2fs/segment.c
··· 25 25 #define __reverse_ffz(x) __reverse_ffs(~(x)) 26 26 27 27 static struct kmem_cache *discard_entry_slab; 28 + static struct kmem_cache *sit_entry_set_slab; 29 + static struct kmem_cache *inmem_entry_slab; 28 30 29 31 /* 30 32 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since ··· 174 172 return result + __reverse_ffz(tmp); 175 173 } 176 174 175 + void register_inmem_page(struct inode *inode, struct page *page) 176 + { 177 + struct f2fs_inode_info *fi = F2FS_I(inode); 178 + struct inmem_pages *new; 179 + 180 + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 181 + 182 + /* add atomic page indices to the list */ 183 + new->page = page; 184 + INIT_LIST_HEAD(&new->list); 185 + 186 + /* increase reference count with clean state */ 187 + mutex_lock(&fi->inmem_lock); 188 + get_page(page); 189 + list_add_tail(&new->list, &fi->inmem_pages); 190 + mutex_unlock(&fi->inmem_lock); 191 + } 192 + 193 + void commit_inmem_pages(struct inode *inode, bool abort) 194 + { 195 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 196 + struct f2fs_inode_info *fi = F2FS_I(inode); 197 + struct inmem_pages *cur, *tmp; 198 + bool submit_bio = false; 199 + struct f2fs_io_info fio = { 200 + .type = DATA, 201 + .rw = WRITE_SYNC, 202 + }; 203 + 204 + f2fs_balance_fs(sbi); 205 + f2fs_lock_op(sbi); 206 + 207 + mutex_lock(&fi->inmem_lock); 208 + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 209 + lock_page(cur->page); 210 + if (!abort && cur->page->mapping == inode->i_mapping) { 211 + f2fs_wait_on_page_writeback(cur->page, DATA); 212 + if (clear_page_dirty_for_io(cur->page)) 213 + inode_dec_dirty_pages(inode); 214 + do_write_data_page(cur->page, &fio); 215 + submit_bio = true; 216 + } 217 + f2fs_put_page(cur->page, 1); 218 + list_del(&cur->list); 219 + kmem_cache_free(inmem_entry_slab, cur); 220 + } 221 + if (submit_bio) 222 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 223 + mutex_unlock(&fi->inmem_lock); 224 + 225 + filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); 226 + f2fs_unlock_op(sbi); 227 + } 228 + 177 229 /* 178 230 * This function balances dirty node and dentry pages. 179 231 * In addition, it controls garbage collection. ··· 261 205 if (kthread_should_stop()) 262 206 return 0; 263 207 264 - spin_lock(&fcc->issue_lock); 265 - if (fcc->issue_list) { 266 - fcc->dispatch_list = fcc->issue_list; 267 - fcc->issue_list = fcc->issue_tail = NULL; 268 - } 269 - spin_unlock(&fcc->issue_lock); 270 - 271 - if (fcc->dispatch_list) { 208 + if (!llist_empty(&fcc->issue_list)) { 272 209 struct bio *bio = bio_alloc(GFP_NOIO, 0); 273 210 struct flush_cmd *cmd, *next; 274 211 int ret; 275 212 213 + fcc->dispatch_list = llist_del_all(&fcc->issue_list); 214 + fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 215 + 276 216 bio->bi_bdev = sbi->sb->s_bdev; 277 217 ret = submit_bio_wait(WRITE_FLUSH, bio); 278 218 279 - for (cmd = fcc->dispatch_list; cmd; cmd = next) { 219 + llist_for_each_entry_safe(cmd, next, 220 + fcc->dispatch_list, llnode) { 280 221 cmd->ret = ret; 281 - next = cmd->next; 282 222 complete(&cmd->wait); 283 223 } 284 224 bio_put(bio); ··· 282 230 } 283 231 284 232 wait_event_interruptible(*q, 285 - kthread_should_stop() || fcc->issue_list); 233 + kthread_should_stop() || !llist_empty(&fcc->issue_list)); 286 234 goto repeat; 287 235 } 288 236 ··· 301 249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 302 250 303 251 init_completion(&cmd.wait); 304 - cmd.next = NULL; 305 252 306 - spin_lock(&fcc->issue_lock); 307 - if (fcc->issue_list) 308 - fcc->issue_tail->next = &cmd; 309 - else 310 - fcc->issue_list = &cmd; 311 - fcc->issue_tail = &cmd; 312 - spin_unlock(&fcc->issue_lock); 253 + llist_add(&cmd.llnode, &fcc->issue_list); 313 254 314 255 if (!fcc->dispatch_list) 315 256 wake_up(&fcc->flush_wait_queue); ··· 321 276 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 322 277 if (!fcc) 323 278 return -ENOMEM; 324 - spin_lock_init(&fcc->issue_lock); 325 279 init_waitqueue_head(&fcc->flush_wait_queue); 280 + init_llist_head(&fcc->issue_list); 326 281 SM_I(sbi)->cmd_control_info = fcc; 327 282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 328 283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); ··· 362 317 struct seg_entry *sentry = get_seg_entry(sbi, segno); 363 318 enum dirty_type t = sentry->type; 364 319 320 + if (unlikely(t >= DIRTY)) { 321 + f2fs_bug_on(sbi, 1); 322 + return; 323 + } 365 324 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 366 325 dirty_i->nr_dirty[t]++; 367 326 } ··· 425 376 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 426 377 block_t blkstart, block_t blklen) 427 378 { 428 - sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); 429 - sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); 379 + sector_t start = SECTOR_FROM_BLOCK(blkstart); 380 + sector_t len = SECTOR_FROM_BLOCK(blklen); 430 381 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 431 382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 432 383 } ··· 441 392 } 442 393 } 443 394 444 - static void add_discard_addrs(struct f2fs_sb_info *sbi, 445 - unsigned int segno, struct seg_entry *se) 395 + static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) 446 396 { 447 397 struct list_head *head = &SM_I(sbi)->discard_list; 448 398 struct discard_entry *new; 449 399 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 450 400 int max_blocks = sbi->blocks_per_seg; 401 + struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 451 402 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 452 403 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 453 404 unsigned long dmap[entries]; 454 405 unsigned int start = 0, end = -1; 406 + bool force = (cpc->reason == CP_DISCARD); 455 407 int i; 456 408 457 - if (!test_opt(sbi, DISCARD)) 409 + if (!force && !test_opt(sbi, DISCARD)) 458 410 return; 411 + 412 + if (force && !se->valid_blocks) { 413 + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 414 + /* 415 + * if this segment is registered in the prefree list, then 416 + * we should skip adding a discard candidate, and let the 417 + * checkpoint do that later. 418 + */ 419 + mutex_lock(&dirty_i->seglist_lock); 420 + if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { 421 + mutex_unlock(&dirty_i->seglist_lock); 422 + cpc->trimmed += sbi->blocks_per_seg; 423 + return; 424 + } 425 + mutex_unlock(&dirty_i->seglist_lock); 426 + 427 + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 428 + INIT_LIST_HEAD(&new->list); 429 + new->blkaddr = START_BLOCK(sbi, cpc->trim_start); 430 + new->len = sbi->blocks_per_seg; 431 + list_add_tail(&new->list, head); 432 + SM_I(sbi)->nr_discards += sbi->blocks_per_seg; 433 + cpc->trimmed += sbi->blocks_per_seg; 434 + return; 435 + } 459 436 460 437 /* zero block will be discarded through the prefree list */ 461 438 if (!se->valid_blocks || se->valid_blocks == max_blocks) ··· 491 416 for (i = 0; i < entries; i++) 492 417 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 493 418 494 - while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 419 + while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 495 420 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 496 421 if (start >= max_blocks) 497 422 break; 498 423 499 424 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 500 425 426 + if (end - start < cpc->trim_minlen) 427 + continue; 428 + 501 429 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 502 430 INIT_LIST_HEAD(&new->list); 503 - new->blkaddr = START_BLOCK(sbi, segno) + start; 431 + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; 504 432 new->len = end - start; 433 + cpc->trimmed += end - start; 505 434 506 435 list_add_tail(&new->list, head); 507 436 SM_I(sbi)->nr_discards += end - start; 437 + } 438 + } 439 + 440 + void release_discard_addrs(struct f2fs_sb_info *sbi) 441 + { 442 + struct list_head *head = &(SM_I(sbi)->discard_list); 443 + struct discard_entry *entry, *this; 444 + 445 + /* drop caches */ 446 + list_for_each_entry_safe(entry, this, head, list) { 447 + list_del(&entry->list); 448 + kmem_cache_free(discard_entry_slab, entry); 508 449 } 509 450 } 510 451 ··· 531 440 { 532 441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 533 442 unsigned int segno; 534 - unsigned int total_segs = TOTAL_SEGS(sbi); 535 443 536 444 mutex_lock(&dirty_i->seglist_lock); 537 - for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) 445 + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) 538 446 __set_test_and_free(sbi, segno); 539 447 mutex_unlock(&dirty_i->seglist_lock); 540 448 } ··· 544 454 struct discard_entry *entry, *this; 545 455 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 546 456 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 547 - unsigned int total_segs = TOTAL_SEGS(sbi); 548 457 unsigned int start = 0, end = -1; 549 458 550 459 mutex_lock(&dirty_i->seglist_lock); 551 460 552 461 while (1) { 553 462 int i; 554 - start = find_next_bit(prefree_map, total_segs, end + 1); 555 - if (start >= total_segs) 463 + start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); 464 + if (start >= MAIN_SEGS(sbi)) 556 465 break; 557 - end = find_next_zero_bit(prefree_map, total_segs, start + 1); 466 + end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), 467 + start + 1); 558 468 559 469 for (i = start; i < end; i++) 560 470 clear_bit(i, prefree_map); ··· 578 488 } 579 489 } 580 490 581 - static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 491 + static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 582 492 { 583 493 struct sit_info *sit_i = SIT_I(sbi); 584 - if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) 494 + 495 + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { 585 496 sit_i->dirty_sentries++; 497 + return false; 498 + } 499 + 500 + return true; 586 501 } 587 502 588 503 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, ··· 611 516 new_vblocks = se->valid_blocks + del; 612 517 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 613 518 614 - f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 519 + f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || 615 520 (new_vblocks > sbi->blocks_per_seg))); 616 521 617 522 se->valid_blocks = new_vblocks; ··· 621 526 /* Update valid block bitmap */ 622 527 if (del > 0) { 623 528 if (f2fs_set_bit(offset, se->cur_valid_map)) 624 - BUG(); 529 + f2fs_bug_on(sbi, 1); 625 530 } else { 626 531 if (!f2fs_clear_bit(offset, se->cur_valid_map)) 627 - BUG(); 532 + f2fs_bug_on(sbi, 1); 628 533 } 629 534 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 630 535 se->ckpt_valid_blocks += del; ··· 653 558 unsigned int segno = GET_SEGNO(sbi, addr); 654 559 struct sit_info *sit_i = SIT_I(sbi); 655 560 656 - f2fs_bug_on(addr == NULL_ADDR); 561 + f2fs_bug_on(sbi, addr == NULL_ADDR); 657 562 if (addr == NEW_ADDR) 658 563 return; 659 564 ··· 729 634 unsigned int segno = curseg->segno + 1; 730 635 struct free_segmap_info *free_i = FREE_I(sbi); 731 636 732 - if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) 637 + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 733 638 return !test_bit(segno, free_i->free_segmap); 734 639 return 0; 735 640 } ··· 743 648 { 744 649 struct free_segmap_info *free_i = FREE_I(sbi); 745 650 unsigned int segno, secno, zoneno; 746 - unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; 651 + unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 747 652 unsigned int hint = *newseg / sbi->segs_per_sec; 748 653 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 749 654 unsigned int left_start = hint; ··· 755 660 756 661 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 757 662 segno = find_next_zero_bit(free_i->free_segmap, 758 - TOTAL_SEGS(sbi), *newseg + 1); 663 + MAIN_SEGS(sbi), *newseg + 1); 759 664 if (segno - *newseg < sbi->segs_per_sec - 760 665 (*newseg % sbi->segs_per_sec)) 761 666 goto got_it; 762 667 } 763 668 find_other_zone: 764 - secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); 765 - if (secno >= TOTAL_SECS(sbi)) { 669 + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); 670 + if (secno >= MAIN_SECS(sbi)) { 766 671 if (dir == ALLOC_RIGHT) { 767 672 secno = find_next_zero_bit(free_i->free_secmap, 768 - TOTAL_SECS(sbi), 0); 769 - f2fs_bug_on(secno >= TOTAL_SECS(sbi)); 673 + MAIN_SECS(sbi), 0); 674 + f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); 770 675 } else { 771 676 go_left = 1; 772 677 left_start = hint - 1; ··· 781 686 continue; 782 687 } 783 688 left_start = find_next_zero_bit(free_i->free_secmap, 784 - TOTAL_SECS(sbi), 0); 785 - f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); 689 + MAIN_SECS(sbi), 0); 690 + f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); 786 691 break; 787 692 } 788 693 secno = left_start; ··· 821 726 } 822 727 got_it: 823 728 /* set it as dirty segment in free segmap */ 824 - f2fs_bug_on(test_bit(segno, free_i->free_segmap)); 729 + f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 825 730 __set_inuse(sbi, segno); 826 731 *newseg = segno; 827 732 write_unlock(&free_i->segmap_lock); ··· 993 898 .allocate_segment = allocate_segment_by_default, 994 899 }; 995 900 901 + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 902 + { 903 + __u64 start = range->start >> sbi->log_blocksize; 904 + __u64 end = start + (range->len >> sbi->log_blocksize) - 1; 905 + unsigned int start_segno, end_segno; 906 + struct cp_control cpc; 907 + 908 + if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || 909 + range->len < sbi->blocksize) 910 + return -EINVAL; 911 + 912 + if (end <= MAIN_BLKADDR(sbi)) 913 + goto out; 914 + 915 + /* start/end segment number in main_area */ 916 + start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 917 + end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 918 + GET_SEGNO(sbi, end); 919 + cpc.reason = CP_DISCARD; 920 + cpc.trim_start = start_segno; 921 + cpc.trim_end = end_segno; 922 + cpc.trim_minlen = range->minlen >> sbi->log_blocksize; 923 + cpc.trimmed = 0; 924 + 925 + /* do checkpoint to issue discard commands safely */ 926 + write_checkpoint(sbi, &cpc); 927 + out: 928 + range->len = cpc.trimmed << sbi->log_blocksize; 929 + return 0; 930 + } 931 + 996 932 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 997 933 { 998 934 struct curseg_info *curseg = CURSEG_I(sbi, type); ··· 1079 953 1080 954 static int __get_segment_type(struct page *page, enum page_type p_type) 1081 955 { 1082 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1083 - switch (sbi->active_logs) { 956 + switch (F2FS_P_SB(page)->active_logs) { 1084 957 case 2: 1085 958 return __get_segment_type_2(page, p_type); 1086 959 case 4: 1087 960 return __get_segment_type_4(page, p_type); 1088 961 } 1089 962 /* NR_CURSEG_TYPE(6) logs by default */ 1090 - f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); 963 + f2fs_bug_on(F2FS_P_SB(page), 964 + F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); 1091 965 return __get_segment_type_6(page, p_type); 1092 966 } 1093 967 ··· 1167 1041 void write_data_page(struct page *page, struct dnode_of_data *dn, 1168 1042 block_t *new_blkaddr, struct f2fs_io_info *fio) 1169 1043 { 1170 - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 1044 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1171 1045 struct f2fs_summary sum; 1172 1046 struct node_info ni; 1173 1047 1174 - f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); 1048 + f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 1175 1049 get_node_info(sbi, dn->nid, &ni); 1176 1050 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1177 1051 ··· 1181 1055 void rewrite_data_page(struct page *page, block_t old_blkaddr, 1182 1056 struct f2fs_io_info *fio) 1183 1057 { 1184 - struct inode *inode = page->mapping->host; 1185 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1186 - f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); 1058 + f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); 1187 1059 } 1188 1060 1189 1061 void recover_data_page(struct f2fs_sb_info *sbi, ··· 1254 1130 void f2fs_wait_on_page_writeback(struct page *page, 1255 1131 enum page_type type) 1256 1132 { 1257 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1258 1133 if (PageWriteback(page)) { 1134 + struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1135 + 1259 1136 if (is_merged_page(sbi, page, type)) 1260 1137 f2fs_submit_merged_bio(sbi, type, WRITE); 1261 1138 wait_on_page_writeback(page); ··· 1525 1400 unsigned int segno) 1526 1401 { 1527 1402 struct sit_info *sit_i = SIT_I(sbi); 1528 - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); 1403 + unsigned int offset = SIT_BLOCK_OFFSET(segno); 1529 1404 block_t blk_addr = sit_i->sit_base_addr + offset; 1530 1405 1531 1406 check_seg_range(sbi, segno); ··· 1551 1426 /* get current sit block page without lock */ 1552 1427 src_page = get_meta_page(sbi, src_off); 1553 1428 dst_page = grab_meta_page(sbi, dst_off); 1554 - f2fs_bug_on(PageDirty(src_page)); 1429 + f2fs_bug_on(sbi, PageDirty(src_page)); 1555 1430 1556 1431 src_addr = page_address(src_page); 1557 1432 dst_addr = page_address(dst_page); ··· 1565 1440 return dst_page; 1566 1441 } 1567 1442 1568 - static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) 1443 + static struct sit_entry_set *grab_sit_entry_set(void) 1444 + { 1445 + struct sit_entry_set *ses = 1446 + f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); 1447 + 1448 + ses->entry_cnt = 0; 1449 + INIT_LIST_HEAD(&ses->set_list); 1450 + return ses; 1451 + } 1452 + 1453 + static void release_sit_entry_set(struct sit_entry_set *ses) 1454 + { 1455 + list_del(&ses->set_list); 1456 + kmem_cache_free(sit_entry_set_slab, ses); 1457 + } 1458 + 1459 + static void adjust_sit_entry_set(struct sit_entry_set *ses, 1460 + struct list_head *head) 1461 + { 1462 + struct sit_entry_set *next = ses; 1463 + 1464 + if (list_is_last(&ses->set_list, head)) 1465 + return; 1466 + 1467 + list_for_each_entry_continue(next, head, set_list) 1468 + if (ses->entry_cnt <= next->entry_cnt) 1469 + break; 1470 + 1471 + list_move_tail(&ses->set_list, &next->set_list); 1472 + } 1473 + 1474 + static void add_sit_entry(unsigned int segno, struct list_head *head) 1475 + { 1476 + struct sit_entry_set *ses; 1477 + unsigned int start_segno = START_SEGNO(segno); 1478 + 1479 + list_for_each_entry(ses, head, set_list) { 1480 + if (ses->start_segno == start_segno) { 1481 + ses->entry_cnt++; 1482 + adjust_sit_entry_set(ses, head); 1483 + return; 1484 + } 1485 + } 1486 + 1487 + ses = grab_sit_entry_set(); 1488 + 1489 + ses->start_segno = start_segno; 1490 + ses->entry_cnt++; 1491 + list_add(&ses->set_list, head); 1492 + } 1493 + 1494 + static void add_sits_in_set(struct f2fs_sb_info *sbi) 1495 + { 1496 + struct f2fs_sm_info *sm_info = SM_I(sbi); 1497 + struct list_head *set_list = &sm_info->sit_entry_set; 1498 + unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; 1499 + unsigned int segno; 1500 + 1501 + for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) 1502 + add_sit_entry(segno, set_list); 1503 + } 1504 + 1505 + static void remove_sits_in_journal(struct f2fs_sb_info *sbi) 1569 1506 { 1570 1507 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1571 1508 struct f2fs_summary_block *sum = curseg->sum_blk; 1572 1509 int i; 1573 1510 1574 - /* 1575 - * If the journal area in the current summary is full of sit entries, 1576 - * all the sit entries will be flushed. Otherwise the sit entries 1577 - * are not able to replace with newly hot sit entries. 1578 - */ 1579 - if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { 1580 - for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1581 - unsigned int segno; 1582 - segno = le32_to_cpu(segno_in_journal(sum, i)); 1583 - __mark_sit_entry_dirty(sbi, segno); 1584 - } 1585 - update_sits_in_cursum(sum, -sits_in_cursum(sum)); 1586 - return true; 1511 + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1512 + unsigned int segno; 1513 + bool dirtied; 1514 + 1515 + segno = le32_to_cpu(segno_in_journal(sum, i)); 1516 + dirtied = __mark_sit_entry_dirty(sbi, segno); 1517 + 1518 + if (!dirtied) 1519 + add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); 1587 1520 } 1588 - return false; 1521 + update_sits_in_cursum(sum, -sits_in_cursum(sum)); 1589 1522 } 1590 1523 1591 1524 /* 1592 1525 * CP calls this function, which flushes SIT entries including sit_journal, 1593 1526 * and moves prefree segs to free segs. 1594 1527 */ 1595 - void flush_sit_entries(struct f2fs_sb_info *sbi) 1528 + void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1596 1529 { 1597 1530 struct sit_info *sit_i = SIT_I(sbi); 1598 1531 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 1599 1532 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1600 1533 struct f2fs_summary_block *sum = curseg->sum_blk; 1601 - unsigned long nsegs = TOTAL_SEGS(sbi); 1602 - struct page *page = NULL; 1603 - struct f2fs_sit_block *raw_sit = NULL; 1604 - unsigned int start = 0, end = 0; 1605 - unsigned int segno; 1606 - bool flushed; 1534 + struct sit_entry_set *ses, *tmp; 1535 + struct list_head *head = &SM_I(sbi)->sit_entry_set; 1536 + bool to_journal = true; 1537 + struct seg_entry *se; 1607 1538 1608 1539 mutex_lock(&curseg->curseg_mutex); 1609 1540 mutex_lock(&sit_i->sentry_lock); 1610 1541 1611 1542 /* 1612 - * "flushed" indicates whether sit entries in journal are flushed 1613 - * to the SIT area or not. 1543 + * add and account sit entries of dirty bitmap in sit entry 1544 + * set temporarily 1614 1545 */ 1615 - flushed = flush_sits_in_journal(sbi); 1546 + add_sits_in_set(sbi); 1616 1547 1617 - for_each_set_bit(segno, bitmap, nsegs) { 1618 - struct seg_entry *se = get_seg_entry(sbi, segno); 1619 - int sit_offset, offset; 1548 + /* 1549 + * if there are no enough space in journal to store dirty sit 1550 + * entries, remove all entries from journal and add and account 1551 + * them in sit entry set. 1552 + */ 1553 + if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) 1554 + remove_sits_in_journal(sbi); 1620 1555 1621 - sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1556 + if (!sit_i->dirty_sentries) 1557 + goto out; 1622 1558 1623 - /* add discard candidates */ 1624 - if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) 1625 - add_discard_addrs(sbi, segno, se); 1559 + /* 1560 + * there are two steps to flush sit entries: 1561 + * #1, flush sit entries to journal in current cold data summary block. 1562 + * #2, flush sit entries to sit page. 1563 + */ 1564 + list_for_each_entry_safe(ses, tmp, head, set_list) { 1565 + struct page *page; 1566 + struct f2fs_sit_block *raw_sit = NULL; 1567 + unsigned int start_segno = ses->start_segno; 1568 + unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, 1569 + (unsigned long)MAIN_SEGS(sbi)); 1570 + unsigned int segno = start_segno; 1626 1571 1627 - if (flushed) 1628 - goto to_sit_page; 1572 + if (to_journal && 1573 + !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) 1574 + to_journal = false; 1629 1575 1630 - offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); 1631 - if (offset >= 0) { 1632 - segno_in_journal(sum, offset) = cpu_to_le32(segno); 1633 - seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); 1634 - goto flush_done; 1635 - } 1636 - to_sit_page: 1637 - if (!page || (start > segno) || (segno > end)) { 1638 - if (page) { 1639 - f2fs_put_page(page, 1); 1640 - page = NULL; 1641 - } 1642 - 1643 - start = START_SEGNO(sit_i, segno); 1644 - end = start + SIT_ENTRY_PER_BLOCK - 1; 1645 - 1646 - /* read sit block that will be updated */ 1647 - page = get_next_sit_page(sbi, start); 1576 + if (!to_journal) { 1577 + page = get_next_sit_page(sbi, start_segno); 1648 1578 raw_sit = page_address(page); 1649 1579 } 1650 1580 1651 - /* udpate entry in SIT block */ 1652 - seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); 1653 - flush_done: 1654 - __clear_bit(segno, bitmap); 1655 - sit_i->dirty_sentries--; 1581 + /* flush dirty sit entries in region of current sit set */ 1582 + for_each_set_bit_from(segno, bitmap, end) { 1583 + int offset, sit_offset; 1584 + 1585 + se = get_seg_entry(sbi, segno); 1586 + 1587 + /* add discard candidates */ 1588 + if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { 1589 + cpc->trim_start = segno; 1590 + add_discard_addrs(sbi, cpc); 1591 + } 1592 + 1593 + if (to_journal) { 1594 + offset = lookup_journal_in_cursum(sum, 1595 + SIT_JOURNAL, segno, 1); 1596 + f2fs_bug_on(sbi, offset < 0); 1597 + segno_in_journal(sum, offset) = 1598 + cpu_to_le32(segno); 1599 + seg_info_to_raw_sit(se, 1600 + &sit_in_journal(sum, offset)); 1601 + } else { 1602 + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1603 + seg_info_to_raw_sit(se, 1604 + &raw_sit->entries[sit_offset]); 1605 + } 1606 + 1607 + __clear_bit(segno, bitmap); 1608 + sit_i->dirty_sentries--; 1609 + ses->entry_cnt--; 1610 + } 1611 + 1612 + if (!to_journal) 1613 + f2fs_put_page(page, 1); 1614 + 1615 + f2fs_bug_on(sbi, ses->entry_cnt); 1616 + release_sit_entry_set(ses); 1617 + } 1618 + 1619 + f2fs_bug_on(sbi, !list_empty(head)); 1620 + f2fs_bug_on(sbi, sit_i->dirty_sentries); 1621 + out: 1622 + if (cpc->reason == CP_DISCARD) { 1623 + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 1624 + add_discard_addrs(sbi, cpc); 1656 1625 } 1657 1626 mutex_unlock(&sit_i->sentry_lock); 1658 1627 mutex_unlock(&curseg->curseg_mutex); 1659 - 1660 - /* writeout last modified SIT block */ 1661 - f2fs_put_page(page, 1); 1662 1628 1663 1629 set_prefree_as_free_segments(sbi); 1664 1630 } ··· 1770 1554 1771 1555 SM_I(sbi)->sit_info = sit_i; 1772 1556 1773 - sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); 1557 + sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry)); 1774 1558 if (!sit_i->sentries) 1775 1559 return -ENOMEM; 1776 1560 1777 - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1561 + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 1778 1562 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1779 1563 if (!sit_i->dirty_sentries_bitmap) 1780 1564 return -ENOMEM; 1781 1565 1782 - for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1566 + for (start = 0; start < MAIN_SEGS(sbi); start++) { 1783 1567 sit_i->sentries[start].cur_valid_map 1784 1568 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 1785 1569 sit_i->sentries[start].ckpt_valid_map ··· 1790 1574 } 1791 1575 1792 1576 if (sbi->segs_per_sec > 1) { 1793 - sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * 1577 + sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * 1794 1578 sizeof(struct sec_entry)); 1795 1579 if (!sit_i->sec_entries) 1796 1580 return -ENOMEM; ··· 1825 1609 1826 1610 static int build_free_segmap(struct f2fs_sb_info *sbi) 1827 1611 { 1828 - struct f2fs_sm_info *sm_info = SM_I(sbi); 1829 1612 struct free_segmap_info *free_i; 1830 1613 unsigned int bitmap_size, sec_bitmap_size; 1831 1614 ··· 1835 1620 1836 1621 SM_I(sbi)->free_info = free_i; 1837 1622 1838 - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1623 + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 1839 1624 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); 1840 1625 if (!free_i->free_segmap) 1841 1626 return -ENOMEM; 1842 1627 1843 - sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1628 + sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 1844 1629 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); 1845 1630 if (!free_i->free_secmap) 1846 1631 return -ENOMEM; ··· 1850 1635 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 1851 1636 1852 1637 /* init free segmap information */ 1853 - free_i->start_segno = 1854 - (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); 1638 + free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 1855 1639 free_i->free_segments = 0; 1856 1640 free_i->free_sections = 0; 1857 1641 rwlock_init(&free_i->segmap_lock); ··· 1887 1673 int sit_blk_cnt = SIT_BLK_CNT(sbi); 1888 1674 unsigned int i, start, end; 1889 1675 unsigned int readed, start_blk = 0; 1890 - int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1676 + int nrpages = MAX_BIO_BLOCKS(sbi); 1891 1677 1892 1678 do { 1893 1679 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); ··· 1895 1681 start = start_blk * sit_i->sents_per_block; 1896 1682 end = (start_blk + readed) * sit_i->sents_per_block; 1897 1683 1898 - for (; start < end && start < TOTAL_SEGS(sbi); start++) { 1684 + for (; start < end && start < MAIN_SEGS(sbi); start++) { 1899 1685 struct seg_entry *se = &sit_i->sentries[start]; 1900 1686 struct f2fs_sit_block *sit_blk; 1901 1687 struct f2fs_sit_entry sit; ··· 1933 1719 unsigned int start; 1934 1720 int type; 1935 1721 1936 - for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1722 + for (start = 0; start < MAIN_SEGS(sbi); start++) { 1937 1723 struct seg_entry *sentry = get_seg_entry(sbi, start); 1938 1724 if (!sentry->valid_blocks) 1939 1725 __set_free(sbi, start); ··· 1950 1736 { 1951 1737 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1952 1738 struct free_segmap_info *free_i = FREE_I(sbi); 1953 - unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); 1739 + unsigned int segno = 0, offset = 0; 1954 1740 unsigned short valid_blocks; 1955 1741 1956 1742 while (1) { 1957 1743 /* find dirty segment based on free segmap */ 1958 - segno = find_next_inuse(free_i, total_segs, offset); 1959 - if (segno >= total_segs) 1744 + segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); 1745 + if (segno >= MAIN_SEGS(sbi)) 1960 1746 break; 1961 1747 offset = segno + 1; 1962 1748 valid_blocks = get_valid_blocks(sbi, segno, 0); 1963 - if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) 1749 + if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) 1964 1750 continue; 1751 + if (valid_blocks > sbi->blocks_per_seg) { 1752 + f2fs_bug_on(sbi, 1); 1753 + continue; 1754 + } 1965 1755 mutex_lock(&dirty_i->seglist_lock); 1966 1756 __locate_dirty_segment(sbi, segno, DIRTY); 1967 1757 mutex_unlock(&dirty_i->seglist_lock); ··· 1975 1757 static int init_victim_secmap(struct f2fs_sb_info *sbi) 1976 1758 { 1977 1759 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1978 - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1760 + unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 1979 1761 1980 1762 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); 1981 1763 if (!dirty_i->victim_secmap) ··· 1996 1778 SM_I(sbi)->dirty_info = dirty_i; 1997 1779 mutex_init(&dirty_i->seglist_lock); 1998 1780 1999 - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1781 + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2000 1782 2001 1783 for (i = 0; i < NR_DIRTY_TYPE; i++) { 2002 1784 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); ··· 2020 1802 2021 1803 sit_i->min_mtime = LLONG_MAX; 2022 1804 2023 - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 1805 + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 2024 1806 unsigned int i; 2025 1807 unsigned long long mtime = 0; 2026 1808 ··· 2058 1840 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 2059 1841 sm_info->rec_prefree_segments = sm_info->main_segments * 2060 1842 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 2061 - sm_info->ipu_policy = F2FS_IPU_DISABLE; 1843 + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 2062 1844 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 1845 + sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 2063 1846 2064 1847 INIT_LIST_HEAD(&sm_info->discard_list); 2065 1848 sm_info->nr_discards = 0; 2066 1849 sm_info->max_discards = 0; 1850 + 1851 + INIT_LIST_HEAD(&sm_info->sit_entry_set); 2067 1852 2068 1853 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 2069 1854 err = create_flush_cmd_control(sbi); ··· 2163 1942 return; 2164 1943 2165 1944 if (sit_i->sentries) { 2166 - for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1945 + for (start = 0; start < MAIN_SEGS(sbi); start++) { 2167 1946 kfree(sit_i->sentries[start].cur_valid_map); 2168 1947 kfree(sit_i->sentries[start].ckpt_valid_map); 2169 1948 } ··· 2197 1976 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 2198 1977 sizeof(struct discard_entry)); 2199 1978 if (!discard_entry_slab) 2200 - return -ENOMEM; 1979 + goto fail; 1980 + 1981 + sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", 1982 + sizeof(struct nat_entry_set)); 1983 + if (!sit_entry_set_slab) 1984 + goto destory_discard_entry; 1985 + 1986 + inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", 1987 + sizeof(struct inmem_pages)); 1988 + if (!inmem_entry_slab) 1989 + goto destroy_sit_entry_set; 2201 1990 return 0; 1991 + 1992 + destroy_sit_entry_set: 1993 + kmem_cache_destroy(sit_entry_set_slab); 1994 + destory_discard_entry: 1995 + kmem_cache_destroy(discard_entry_slab); 1996 + fail: 1997 + return -ENOMEM; 2202 1998 } 2203 1999 2204 2000 void destroy_segment_manager_caches(void) 2205 2001 { 2002 + kmem_cache_destroy(sit_entry_set_slab); 2206 2003 kmem_cache_destroy(discard_entry_slab); 2004 + kmem_cache_destroy(inmem_entry_slab); 2207 2005 }
+98 -62
fs/f2fs/segment.h
··· 45 45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ 46 46 sbi->segs_per_sec)) \ 47 47 48 - #define START_BLOCK(sbi, segno) \ 49 - (SM_I(sbi)->seg0_blkaddr + \ 48 + #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) 49 + #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) 50 + 51 + #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) 52 + #define MAIN_SECS(sbi) (sbi->total_sections) 53 + 54 + #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) 55 + #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) 56 + 57 + #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) 58 + #define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ 59 + sbi->log_blocks_per_seg)) 60 + 61 + #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ 50 62 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) 63 + 51 64 #define NEXT_FREE_BLKADDR(sbi, curseg) \ 52 65 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) 53 66 54 - #define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) 55 - 56 - #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ 57 - ((blk_addr) - SM_I(sbi)->seg0_blkaddr) 67 + #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) 58 68 #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 59 69 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 60 70 #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ ··· 87 77 88 78 #define SIT_ENTRY_OFFSET(sit_i, segno) \ 89 79 (segno % sit_i->sents_per_block) 90 - #define SIT_BLOCK_OFFSET(sit_i, segno) \ 80 + #define SIT_BLOCK_OFFSET(segno) \ 91 81 (segno / SIT_ENTRY_PER_BLOCK) 92 - #define START_SEGNO(sit_i, segno) \ 93 - (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) 82 + #define START_SEGNO(segno) \ 83 + (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) 94 84 #define SIT_BLK_CNT(sbi) \ 95 - ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) 85 + ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) 96 86 #define f2fs_bitmap_size(nr) \ 97 87 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 98 - #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) 99 - #define TOTAL_SECS(sbi) (sbi->total_sections) 100 88 101 - #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 102 - (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) 103 - #define SECTOR_TO_BLOCK(sbi, sectors) \ 104 - (sectors >> (sbi)->log_sectors_per_block) 105 - #define MAX_BIO_BLOCKS(max_hw_blocks) \ 106 - (min((int)max_hw_blocks, BIO_MAX_PAGES)) 89 + #define SECTOR_FROM_BLOCK(blk_addr) \ 90 + (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) 91 + #define SECTOR_TO_BLOCK(sectors) \ 92 + (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) 93 + #define MAX_BIO_BLOCKS(sbi) \ 94 + ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) 107 95 108 96 /* 109 97 * indicate a block allocation direction: RIGHT and LEFT. ··· 175 167 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 176 168 }; 177 169 170 + struct inmem_pages { 171 + struct list_head list; 172 + struct page *page; 173 + }; 174 + 178 175 struct sit_info { 179 176 const struct segment_allocation *s_ops; 180 177 ··· 248 235 unsigned short next_blkoff; /* next block offset to write */ 249 236 unsigned int zone; /* current zone number */ 250 237 unsigned int next_segno; /* preallocated segment */ 238 + }; 239 + 240 + struct sit_entry_set { 241 + struct list_head set_list; /* link with all sit sets */ 242 + unsigned int start_segno; /* start segno of sits in set */ 243 + unsigned int entry_cnt; /* the # of sit entries in set */ 251 244 }; 252 245 253 246 /* ··· 335 316 clear_bit(segno, free_i->free_segmap); 336 317 free_i->free_segments++; 337 318 338 - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); 319 + next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); 339 320 if (next >= start_segno + sbi->segs_per_sec) { 340 321 clear_bit(secno, free_i->free_secmap); 341 322 free_i->free_sections++; ··· 449 430 450 431 static inline bool need_SSR(struct f2fs_sb_info *sbi) 451 432 { 452 - return (prefree_segments(sbi) / sbi->segs_per_sec) 453 - + free_sections(sbi) < overprovision_sections(sbi); 433 + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 434 + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 435 + return free_sections(sbi) <= (node_secs + 2 * dent_secs + 436 + reserved_sections(sbi) + 1); 454 437 } 455 438 456 439 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) ··· 487 466 * F2FS_IPU_UTIL - if FS utilization is over threashold, 488 467 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over 489 468 * threashold, 469 + * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash 470 + * storages. IPU will be triggered only if the # of dirty 471 + * pages over min_fsync_blocks. 490 472 * F2FS_IPUT_DISABLE - disable IPU. (=default option) 491 473 */ 492 474 #define DEF_MIN_IPU_UTIL 70 475 + #define DEF_MIN_FSYNC_BLOCKS 8 493 476 494 477 enum { 495 478 F2FS_IPU_FORCE, 496 479 F2FS_IPU_SSR, 497 480 F2FS_IPU_UTIL, 498 481 F2FS_IPU_SSR_UTIL, 499 - F2FS_IPU_DISABLE, 482 + F2FS_IPU_FSYNC, 500 483 }; 501 484 502 485 static inline bool need_inplace_update(struct inode *inode) 503 486 { 504 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 487 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 488 + unsigned int policy = SM_I(sbi)->ipu_policy; 505 489 506 490 /* IPU can be done only for the user data */ 507 - if (S_ISDIR(inode->i_mode)) 491 + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) 508 492 return false; 509 493 510 - /* this is only set during fdatasync */ 511 - if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) 494 + if (policy & (0x1 << F2FS_IPU_FORCE)) 495 + return true; 496 + if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) 497 + return true; 498 + if (policy & (0x1 << F2FS_IPU_UTIL) && 499 + utilization(sbi) > SM_I(sbi)->min_ipu_util) 500 + return true; 501 + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && 502 + utilization(sbi) > SM_I(sbi)->min_ipu_util) 512 503 return true; 513 504 514 - switch (SM_I(sbi)->ipu_policy) { 515 - case F2FS_IPU_FORCE: 505 + /* this is only set during fdatasync */ 506 + if (policy & (0x1 << F2FS_IPU_FSYNC) && 507 + is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) 516 508 return true; 517 - case F2FS_IPU_SSR: 518 - if (need_SSR(sbi)) 519 - return true; 520 - break; 521 - case F2FS_IPU_UTIL: 522 - if (utilization(sbi) > SM_I(sbi)->min_ipu_util) 523 - return true; 524 - break; 525 - case F2FS_IPU_SSR_UTIL: 526 - if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) 527 - return true; 528 - break; 529 - case F2FS_IPU_DISABLE: 530 - break; 531 - } 509 + 532 510 return false; 533 511 } 534 512 ··· 554 534 #ifdef CONFIG_F2FS_CHECK_FS 555 535 static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 556 536 { 557 - unsigned int end_segno = SM_I(sbi)->segment_count - 1; 558 - BUG_ON(segno > end_segno); 537 + BUG_ON(segno > TOTAL_SEGS(sbi) - 1); 559 538 } 560 539 561 540 static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 562 541 { 563 - struct f2fs_sm_info *sm_info = SM_I(sbi); 564 - block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; 565 - block_t start_addr = sm_info->seg0_blkaddr; 566 - block_t end_addr = start_addr + total_blks - 1; 567 - BUG_ON(blk_addr < start_addr); 568 - BUG_ON(blk_addr > end_addr); 542 + BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); 543 + BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); 569 544 } 570 545 571 546 /* ··· 569 554 static inline void check_block_count(struct f2fs_sb_info *sbi, 570 555 int segno, struct f2fs_sit_entry *raw_sit) 571 556 { 572 - struct f2fs_sm_info *sm_info = SM_I(sbi); 573 - unsigned int end_segno = sm_info->segment_count - 1; 574 557 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; 575 558 int valid_blocks = 0; 576 559 int cur_pos = 0, next_pos; ··· 577 564 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); 578 565 579 566 /* check boundary of a given segment number */ 580 - BUG_ON(segno > end_segno); 567 + BUG_ON(segno > TOTAL_SEGS(sbi) - 1); 581 568 582 569 /* check bitmap with valid block count */ 583 570 do { ··· 596 583 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 597 584 } 598 585 #else 599 - #define check_seg_range(sbi, segno) 600 - #define verify_block_addr(sbi, blk_addr) 601 - #define check_block_count(sbi, segno, raw_sit) 586 + static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 587 + { 588 + if (segno > TOTAL_SEGS(sbi) - 1) 589 + sbi->need_fsck = true; 590 + } 591 + 592 + static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 593 + { 594 + if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) 595 + sbi->need_fsck = true; 596 + } 597 + 598 + /* 599 + * Summary block is always treated as an invalid block 600 + */ 601 + static inline void check_block_count(struct f2fs_sb_info *sbi, 602 + int segno, struct f2fs_sit_entry *raw_sit) 603 + { 604 + /* check segment usage */ 605 + if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) 606 + sbi->need_fsck = true; 607 + 608 + /* check boundary of a given segment number */ 609 + if (segno > TOTAL_SEGS(sbi) - 1) 610 + sbi->need_fsck = true; 611 + } 602 612 #endif 603 613 604 614 static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 605 615 unsigned int start) 606 616 { 607 617 struct sit_info *sit_i = SIT_I(sbi); 608 - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); 618 + unsigned int offset = SIT_BLOCK_OFFSET(start); 609 619 block_t blk_addr = sit_i->sit_base_addr + offset; 610 620 611 621 check_seg_range(sbi, start); ··· 655 619 656 620 static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) 657 621 { 658 - unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); 622 + unsigned int block_off = SIT_BLOCK_OFFSET(start); 659 623 660 624 if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) 661 625 f2fs_clear_bit(block_off, sit_i->sit_bitmap); ··· 702 666 { 703 667 struct block_device *bdev = sbi->sb->s_bdev; 704 668 struct request_queue *q = bdev_get_queue(bdev); 705 - return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 669 + return SECTOR_TO_BLOCK(queue_max_sectors(q)); 706 670 } 707 671 708 672 /* ··· 719 683 else if (type == NODE) 720 684 return 3 * sbi->blocks_per_seg; 721 685 else if (type == META) 722 - return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 686 + return MAX_BIO_BLOCKS(sbi); 723 687 else 724 688 return 0; 725 689 } ··· 742 706 else if (type == NODE) 743 707 desired = 3 * max_hw_blocks(sbi); 744 708 else 745 - desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 709 + desired = MAX_BIO_BLOCKS(sbi); 746 710 747 711 wbc->nr_to_write = desired; 748 712 return desired - nr_to_write;
+37 -10
fs/f2fs/super.c
··· 190 190 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 191 191 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 192 192 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 193 + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); 193 194 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 194 195 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 195 196 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); ··· 205 204 ATTR_LIST(max_small_discards), 206 205 ATTR_LIST(ipu_policy), 207 206 ATTR_LIST(min_ipu_util), 207 + ATTR_LIST(min_fsync_blocks), 208 208 ATTR_LIST(max_victim_search), 209 209 ATTR_LIST(dir_level), 210 210 ATTR_LIST(ram_thresh), ··· 368 366 369 367 /* Initialize f2fs-specific inode info */ 370 368 fi->vfs_inode.i_version = 1; 371 - atomic_set(&fi->dirty_dents, 0); 369 + atomic_set(&fi->dirty_pages, 0); 372 370 fi->i_current_depth = 1; 373 371 fi->i_advise = 0; 374 372 rwlock_init(&fi->ext.ext_lock); 375 373 init_rwsem(&fi->i_sem); 374 + INIT_LIST_HEAD(&fi->inmem_pages); 375 + mutex_init(&fi->inmem_lock); 376 376 377 377 set_inode_flag(fi, FI_NEW_INODE); 378 378 ··· 436 432 stop_gc_thread(sbi); 437 433 438 434 /* We don't need to do checkpoint when it's clean */ 439 - if (sbi->s_dirty) 440 - write_checkpoint(sbi, true); 435 + if (sbi->s_dirty) { 436 + struct cp_control cpc = { 437 + .reason = CP_UMOUNT, 438 + }; 439 + write_checkpoint(sbi, &cpc); 440 + } 441 441 442 442 /* 443 443 * normally superblock is clean, so we need to release this. 444 444 * In addition, EIO will skip do checkpoint, we need this as well. 445 445 */ 446 446 release_dirty_inode(sbi); 447 + release_discard_addrs(sbi); 447 448 448 449 iput(sbi->node_inode); 449 450 iput(sbi->meta_inode); ··· 473 464 trace_f2fs_sync_fs(sb, sync); 474 465 475 466 if (sync) { 467 + struct cp_control cpc = { 468 + .reason = CP_SYNC, 469 + }; 476 470 mutex_lock(&sbi->gc_mutex); 477 - write_checkpoint(sbi, false); 471 + write_checkpoint(sbi, &cpc); 478 472 mutex_unlock(&sbi->gc_mutex); 479 473 } else { 480 474 f2fs_balance_fs(sbi); ··· 627 615 */ 628 616 org_mount_opt = sbi->mount_opt; 629 617 active_logs = sbi->active_logs; 618 + 619 + sbi->mount_opt.opt = 0; 620 + sbi->active_logs = NR_CURSEG_TYPE; 630 621 631 622 /* parse mount options */ 632 623 err = parse_options(sb, data); ··· 801 786 return 1; 802 787 } 803 788 804 - if (le32_to_cpu(raw_super->log_sectorsize) != 805 - F2FS_LOG_SECTOR_SIZE) { 806 - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); 789 + /* Currently, support 512/1024/2048/4096 bytes sector size */ 790 + if (le32_to_cpu(raw_super->log_sectorsize) > 791 + F2FS_MAX_LOG_SECTOR_SIZE || 792 + le32_to_cpu(raw_super->log_sectorsize) < 793 + F2FS_MIN_LOG_SECTOR_SIZE) { 794 + f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", 795 + le32_to_cpu(raw_super->log_sectorsize)); 807 796 return 1; 808 797 } 809 - if (le32_to_cpu(raw_super->log_sectors_per_block) != 810 - F2FS_LOG_SECTORS_PER_BLOCK) { 811 - f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); 798 + if (le32_to_cpu(raw_super->log_sectors_per_block) + 799 + le32_to_cpu(raw_super->log_sectorsize) != 800 + F2FS_MAX_LOG_SECTOR_SIZE) { 801 + f2fs_msg(sb, KERN_INFO, 802 + "Invalid log sectors per block(%u) log sectorsize(%u)", 803 + le32_to_cpu(raw_super->log_sectors_per_block), 804 + le32_to_cpu(raw_super->log_sectorsize)); 812 805 return 1; 813 806 } 814 807 return 0; ··· 872 849 atomic_set(&sbi->nr_pages[i], 0); 873 850 874 851 sbi->dir_level = DEF_DIR_LEVEL; 852 + sbi->need_fsck = false; 875 853 } 876 854 877 855 /* ··· 1105 1081 "%s", sb->s_id); 1106 1082 if (err) 1107 1083 goto free_proc; 1084 + 1085 + if (!retry) 1086 + sbi->need_fsck = true; 1108 1087 1109 1088 /* recover fsynced data */ 1110 1089 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+4 -4
fs/f2fs/xattr.c
··· 266 266 267 267 static void *read_all_xattrs(struct inode *inode, struct page *ipage) 268 268 { 269 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 270 270 struct f2fs_xattr_header *header; 271 271 size_t size = PAGE_SIZE, inline_size = 0; 272 272 void *txattr_addr; ··· 325 325 static inline int write_all_xattrs(struct inode *inode, __u32 hsize, 326 326 void *txattr_addr, struct page *ipage) 327 327 { 328 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 328 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 329 329 size_t inline_size = 0; 330 330 void *xattr_addr; 331 331 struct page *xpage; ··· 373 373 alloc_nid_failed(sbi, new_nid); 374 374 return PTR_ERR(xpage); 375 375 } 376 - f2fs_bug_on(new_nid); 376 + f2fs_bug_on(sbi, new_nid); 377 377 f2fs_wait_on_page_writeback(xpage, NODE); 378 378 } else { 379 379 struct dnode_of_data dn; ··· 596 596 const void *value, size_t size, 597 597 struct page *ipage, int flags) 598 598 { 599 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 599 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 600 600 int err; 601 601 602 602 /* this case is only from init_inode_metadata */
+4 -2
include/linux/f2fs_fs.h
··· 15 15 #include <linux/types.h> 16 16 17 17 #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ 18 - #define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ 19 - #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ 18 + #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ 19 + #define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ 20 + #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ 20 21 #define F2FS_BLKSIZE 4096 /* support only 4KB block */ 21 22 #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ 22 23 #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) ··· 86 85 /* 87 86 * For checkpoint 88 87 */ 88 + #define CP_FSCK_FLAG 0x00000010 89 89 #define CP_ERROR_FLAG 0x00000008 90 90 #define CP_COMPACT_SUM_FLAG 0x00000004 91 91 #define CP_ORPHAN_PRESENT_FLAG 0x00000002
+11 -5
include/trace/events/f2fs.h
··· 69 69 { GC_GREEDY, "Greedy" }, \ 70 70 { GC_CB, "Cost-Benefit" }) 71 71 72 + #define show_cpreason(type) \ 73 + __print_symbolic(type, \ 74 + { CP_UMOUNT, "Umount" }, \ 75 + { CP_SYNC, "Sync" }, \ 76 + { CP_DISCARD, "Discard" }) 77 + 72 78 struct victim_sel_policy; 73 79 74 80 DECLARE_EVENT_CLASS(f2fs__inode, ··· 950 944 951 945 TRACE_EVENT(f2fs_write_checkpoint, 952 946 953 - TP_PROTO(struct super_block *sb, bool is_umount, char *msg), 947 + TP_PROTO(struct super_block *sb, int reason, char *msg), 954 948 955 - TP_ARGS(sb, is_umount, msg), 949 + TP_ARGS(sb, reason, msg), 956 950 957 951 TP_STRUCT__entry( 958 952 __field(dev_t, dev) 959 - __field(bool, is_umount) 953 + __field(int, reason) 960 954 __field(char *, msg) 961 955 ), 962 956 963 957 TP_fast_assign( 964 958 __entry->dev = sb->s_dev; 965 - __entry->is_umount = is_umount; 959 + __entry->reason = reason; 966 960 __entry->msg = msg; 967 961 ), 968 962 969 963 TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", 970 964 show_dev(__entry), 971 - __entry->is_umount ? "clean umount" : "consistency", 965 + show_cpreason(__entry->reason), 972 966 __entry->msg) 973 967 ); 974 968