Merge tag 'for-f2fs-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

+5

Documentation/filesystems/f2fs.txt

··· 126 126 to eliminate redundant command issues. If the underlying 127 127 device handles the cache_flush command relatively slowly, 128 128 recommend to enable this option. 129 + nobarrier This option can be used if underlying storage guarantees 130 + its cached data should be written to the novolatile area. 131 + If this option is set, no cache_flush commands are issued 132 + but f2fs still guarantees the write ordering of all the 133 + data writes. 129 134 130 135 ================================================================================ 131 136 DEBUGFS ENTRIES

-6

fs/f2fs/acl.c

··· 203 203 size_t size = 0; 204 204 int error; 205 205 206 - if (acl) { 207 - error = posix_acl_valid(acl); 208 - if (error < 0) 209 - return error; 210 - } 211 - 212 206 switch (type) { 213 207 case ACL_TYPE_ACCESS: 214 208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;

+117 -61

fs/f2fs/checkpoint.c

··· 22 22 #include "segment.h" 23 23 #include <trace/events/f2fs.h> 24 24 25 - static struct kmem_cache *orphan_entry_slab; 25 + static struct kmem_cache *ino_entry_slab; 26 26 static struct kmem_cache *inode_entry_slab; 27 27 28 28 /* ··· 282 282 .set_page_dirty = f2fs_set_meta_page_dirty, 283 283 }; 284 284 285 + static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 286 + { 287 + struct ino_entry *e; 288 + retry: 289 + spin_lock(&sbi->ino_lock[type]); 290 + 291 + e = radix_tree_lookup(&sbi->ino_root[type], ino); 292 + if (!e) { 293 + e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); 294 + if (!e) { 295 + spin_unlock(&sbi->ino_lock[type]); 296 + goto retry; 297 + } 298 + if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { 299 + spin_unlock(&sbi->ino_lock[type]); 300 + kmem_cache_free(ino_entry_slab, e); 301 + goto retry; 302 + } 303 + memset(e, 0, sizeof(struct ino_entry)); 304 + e->ino = ino; 305 + 306 + list_add_tail(&e->list, &sbi->ino_list[type]); 307 + } 308 + spin_unlock(&sbi->ino_lock[type]); 309 + } 310 + 311 + static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 312 + { 313 + struct ino_entry *e; 314 + 315 + spin_lock(&sbi->ino_lock[type]); 316 + e = radix_tree_lookup(&sbi->ino_root[type], ino); 317 + if (e) { 318 + list_del(&e->list); 319 + radix_tree_delete(&sbi->ino_root[type], ino); 320 + if (type == ORPHAN_INO) 321 + sbi->n_orphans--; 322 + spin_unlock(&sbi->ino_lock[type]); 323 + kmem_cache_free(ino_entry_slab, e); 324 + return; 325 + } 326 + spin_unlock(&sbi->ino_lock[type]); 327 + } 328 + 329 + void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) 330 + { 331 + /* add new dirty ino entry into list */ 332 + __add_ino_entry(sbi, ino, type); 333 + } 334 + 335 + void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) 336 + { 337 + /* remove dirty ino entry from list */ 338 + __remove_ino_entry(sbi, ino, type); 339 + } 340 + 341 + /* mode should be APPEND_INO or UPDATE_INO */ 342 + bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) 343 + { 344 + struct ino_entry *e; 345 + spin_lock(&sbi->ino_lock[mode]); 346 + e = radix_tree_lookup(&sbi->ino_root[mode], ino); 347 + spin_unlock(&sbi->ino_lock[mode]); 348 + return e ? true : false; 349 + } 350 + 351 + static void release_dirty_inode(struct f2fs_sb_info *sbi) 352 + { 353 + struct ino_entry *e, *tmp; 354 + int i; 355 + 356 + for (i = APPEND_INO; i <= UPDATE_INO; i++) { 357 + spin_lock(&sbi->ino_lock[i]); 358 + list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { 359 + list_del(&e->list); 360 + radix_tree_delete(&sbi->ino_root[i], e->ino); 361 + kmem_cache_free(ino_entry_slab, e); 362 + } 363 + spin_unlock(&sbi->ino_lock[i]); 364 + } 365 + } 366 + 285 367 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 286 368 { 287 369 int err = 0; 288 370 289 - spin_lock(&sbi->orphan_inode_lock); 371 + spin_lock(&sbi->ino_lock[ORPHAN_INO]); 290 372 if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 291 373 err = -ENOSPC; 292 374 else 293 375 sbi->n_orphans++; 294 - spin_unlock(&sbi->orphan_inode_lock); 376 + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 295 377 296 378 return err; 297 379 } 298 380 299 381 void release_orphan_inode(struct f2fs_sb_info *sbi) 300 382 { 301 - spin_lock(&sbi->orphan_inode_lock); 383 + spin_lock(&sbi->ino_lock[ORPHAN_INO]); 302 384 f2fs_bug_on(sbi->n_orphans == 0); 303 385 sbi->n_orphans--; 304 - spin_unlock(&sbi->orphan_inode_lock); 386 + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 305 387 } 306 388 307 389 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 308 390 { 309 - struct list_head *head; 310 - struct orphan_inode_entry *new, *orphan; 311 - 312 - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 313 - new->ino = ino; 314 - 315 - spin_lock(&sbi->orphan_inode_lock); 316 - head = &sbi->orphan_inode_list; 317 - list_for_each_entry(orphan, head, list) { 318 - if (orphan->ino == ino) { 319 - spin_unlock(&sbi->orphan_inode_lock); 320 - kmem_cache_free(orphan_entry_slab, new); 321 - return; 322 - } 323 - 324 - if (orphan->ino > ino) 325 - break; 326 - } 327 - 328 - /* add new orphan entry into list which is sorted by inode number */ 329 - list_add_tail(&new->list, &orphan->list); 330 - spin_unlock(&sbi->orphan_inode_lock); 391 + /* add new orphan ino entry into list */ 392 + __add_ino_entry(sbi, ino, ORPHAN_INO); 331 393 } 332 394 333 395 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 334 396 { 335 - struct list_head *head; 336 - struct orphan_inode_entry *orphan; 337 - 338 - spin_lock(&sbi->orphan_inode_lock); 339 - head = &sbi->orphan_inode_list; 340 - list_for_each_entry(orphan, head, list) { 341 - if (orphan->ino == ino) { 342 - list_del(&orphan->list); 343 - f2fs_bug_on(sbi->n_orphans == 0); 344 - sbi->n_orphans--; 345 - spin_unlock(&sbi->orphan_inode_lock); 346 - kmem_cache_free(orphan_entry_slab, orphan); 347 - return; 348 - } 349 - } 350 - spin_unlock(&sbi->orphan_inode_lock); 397 + /* remove orphan entry from orphan list */ 398 + __remove_ino_entry(sbi, ino, ORPHAN_INO); 351 399 } 352 400 353 401 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) ··· 449 401 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 450 402 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 451 403 struct page *page = NULL; 452 - struct orphan_inode_entry *orphan = NULL; 404 + struct ino_entry *orphan = NULL; 453 405 454 406 for (index = 0; index < orphan_blocks; index++) 455 407 grab_meta_page(sbi, start_blk + index); 456 408 457 409 index = 1; 458 - spin_lock(&sbi->orphan_inode_lock); 459 - head = &sbi->orphan_inode_list; 410 + spin_lock(&sbi->ino_lock[ORPHAN_INO]); 411 + head = &sbi->ino_list[ORPHAN_INO]; 460 412 461 413 /* loop for each orphan inode entry and write them in Jornal block */ 462 414 list_for_each_entry(orphan, head, list) { ··· 496 448 f2fs_put_page(page, 1); 497 449 } 498 450 499 - spin_unlock(&sbi->orphan_inode_lock); 451 + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 500 452 } 501 453 502 454 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, ··· 762 714 * until finishing nat/sit flush. 763 715 */ 764 716 retry_flush_nodes: 765 - mutex_lock(&sbi->node_write); 717 + down_write(&sbi->node_write); 766 718 767 719 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 768 - mutex_unlock(&sbi->node_write); 720 + up_write(&sbi->node_write); 769 721 sync_node_pages(sbi, 0, &wbc); 770 722 goto retry_flush_nodes; 771 723 } ··· 774 726 775 727 static void unblock_operations(struct f2fs_sb_info *sbi) 776 728 { 777 - mutex_unlock(&sbi->node_write); 729 + up_write(&sbi->node_write); 778 730 f2fs_unlock_all(sbi); 779 731 } 780 732 ··· 796 748 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 797 749 { 798 750 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 751 + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 799 752 nid_t last_nid = 0; 800 753 block_t start_blk; 801 754 struct page *cp_page; ··· 810 761 * This avoids to conduct wrong roll-forward operations and uses 811 762 * metapages, so should be called prior to sync_meta_pages below. 812 763 */ 813 - discard_next_dnode(sbi); 764 + discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); 814 765 815 766 /* Flush all the NAT/SIT pages */ 816 767 while (get_pages(sbi, F2FS_DIRTY_META)) ··· 934 885 /* Here, we only have one bio having CP pack */ 935 886 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 936 887 937 - if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 888 + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 938 889 clear_prefree_segments(sbi); 890 + release_dirty_inode(sbi); 939 891 F2FS_RESET_SB_DIRT(sbi); 940 892 } 941 893 } ··· 982 932 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 983 933 } 984 934 985 - void init_orphan_info(struct f2fs_sb_info *sbi) 935 + void init_ino_entry_info(struct f2fs_sb_info *sbi) 986 936 { 987 - spin_lock_init(&sbi->orphan_inode_lock); 988 - INIT_LIST_HEAD(&sbi->orphan_inode_list); 989 - sbi->n_orphans = 0; 937 + int i; 938 + 939 + for (i = 0; i < MAX_INO_ENTRY; i++) { 940 + INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); 941 + spin_lock_init(&sbi->ino_lock[i]); 942 + INIT_LIST_HEAD(&sbi->ino_list[i]); 943 + } 944 + 990 945 /* 991 946 * considering 512 blocks in a segment 8 blocks are needed for cp 992 947 * and log segment summaries. Remaining blocks are used to keep 993 948 * orphan entries with the limitation one reserved segment 994 949 * for cp pack we can have max 1020*504 orphan entries 995 950 */ 951 + sbi->n_orphans = 0; 996 952 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 997 953 * F2FS_ORPHANS_PER_BLOCK; 998 954 } 999 955 1000 956 int __init create_checkpoint_caches(void) 1001 957 { 1002 - orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 1003 - sizeof(struct orphan_inode_entry)); 1004 - if (!orphan_entry_slab) 958 + ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", 959 + sizeof(struct ino_entry)); 960 + if (!ino_entry_slab) 1005 961 return -ENOMEM; 1006 962 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 1007 963 sizeof(struct dir_inode_entry)); 1008 964 if (!inode_entry_slab) { 1009 - kmem_cache_destroy(orphan_entry_slab); 965 + kmem_cache_destroy(ino_entry_slab); 1010 966 return -ENOMEM; 1011 967 } 1012 968 return 0; ··· 1020 964 1021 965 void destroy_checkpoint_caches(void) 1022 966 { 1023 - kmem_cache_destroy(orphan_entry_slab); 967 + kmem_cache_destroy(ino_entry_slab); 1024 968 kmem_cache_destroy(inode_entry_slab); 1025 969 }

+45 -14

fs/f2fs/data.c

··· 139 139 /* change META to META_FLUSH in the checkpoint procedure */ 140 140 if (type >= META_FLUSH) { 141 141 io->fio.type = META_FLUSH; 142 - io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 + if (test_opt(sbi, NOBARRIER)) 143 + io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO; 144 + else 145 + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 143 146 } 144 147 __submit_merged_bio(io); 145 148 up_write(&io->io_rwsem); ··· 629 626 if (check_extent_cache(inode, pgofs, bh_result)) 630 627 goto out; 631 628 632 - if (create) 629 + if (create) { 630 + f2fs_balance_fs(sbi); 633 631 f2fs_lock_op(sbi); 632 + } 634 633 635 634 /* When reading holes, we need its node page */ 636 635 set_new_dnode(&dn, inode, NULL, NULL, 0); ··· 789 784 !is_cold_data(page) && 790 785 need_inplace_update(inode))) { 791 786 rewrite_data_page(page, old_blkaddr, fio); 787 + set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 792 788 } else { 793 789 write_data_page(page, &dn, &new_blkaddr, fio); 794 790 update_extent_cache(new_blkaddr, &dn); 791 + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 795 792 } 796 793 out_writepage: 797 794 f2fs_put_dnode(&dn); ··· 921 914 return 0; 922 915 } 923 916 917 + static void f2fs_write_failed(struct address_space *mapping, loff_t to) 918 + { 919 + struct inode *inode = mapping->host; 920 + 921 + if (to > inode->i_size) { 922 + truncate_pagecache(inode, inode->i_size); 923 + truncate_blocks(inode, inode->i_size); 924 + } 925 + } 926 + 924 927 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 925 928 loff_t pos, unsigned len, unsigned flags, 926 929 struct page **pagep, void **fsdata) ··· 948 931 repeat: 949 932 err = f2fs_convert_inline_data(inode, pos + len); 950 933 if (err) 951 - return err; 934 + goto fail; 952 935 953 936 page = grab_cache_page_write_begin(mapping, index, flags); 954 - if (!page) 955 - return -ENOMEM; 937 + if (!page) { 938 + err = -ENOMEM; 939 + goto fail; 940 + } 956 941 957 942 /* to avoid latency during memory pressure */ 958 943 unlock_page(page); ··· 968 949 set_new_dnode(&dn, inode, NULL, NULL, 0); 969 950 err = f2fs_reserve_block(&dn, index); 970 951 f2fs_unlock_op(sbi); 971 - 972 952 if (err) { 973 953 f2fs_put_page(page, 0); 974 - return err; 954 + goto fail; 975 955 } 976 956 inline_data: 977 957 lock_page(page); ··· 1000 982 err = f2fs_read_inline_data(inode, page); 1001 983 if (err) { 1002 984 page_cache_release(page); 1003 - return err; 985 + goto fail; 1004 986 } 1005 987 } else { 1006 988 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 1007 989 READ_SYNC); 1008 990 if (err) 1009 - return err; 991 + goto fail; 1010 992 } 1011 993 1012 994 lock_page(page); 1013 995 if (unlikely(!PageUptodate(page))) { 1014 996 f2fs_put_page(page, 1); 1015 - return -EIO; 997 + err = -EIO; 998 + goto fail; 1016 999 } 1017 1000 if (unlikely(page->mapping != mapping)) { 1018 1001 f2fs_put_page(page, 1); ··· 1024 1005 SetPageUptodate(page); 1025 1006 clear_cold_data(page); 1026 1007 return 0; 1008 + fail: 1009 + f2fs_write_failed(mapping, pos + len); 1010 + return err; 1027 1011 } 1028 1012 1029 1013 static int f2fs_write_end(struct file *file, ··· 1038 1016 1039 1017 trace_f2fs_write_end(inode, pos, len, copied); 1040 1018 1041 - SetPageUptodate(page); 1042 1019 set_page_dirty(page); 1043 1020 1044 1021 if (pos + copied > i_size_read(inode)) { ··· 1071 1050 struct iov_iter *iter, loff_t offset) 1072 1051 { 1073 1052 struct file *file = iocb->ki_filp; 1074 - struct inode *inode = file->f_mapping->host; 1053 + struct address_space *mapping = file->f_mapping; 1054 + struct inode *inode = mapping->host; 1055 + size_t count = iov_iter_count(iter); 1056 + int err; 1075 1057 1076 1058 /* Let buffer I/O handle the inline data case. */ 1077 1059 if (f2fs_has_inline_data(inode)) ··· 1086 1062 /* clear fsync mark to recover these blocks */ 1087 1063 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1088 1064 1089 - return blockdev_direct_IO(rw, iocb, inode, iter, offset, 1090 - get_data_block); 1065 + trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1066 + 1067 + err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1068 + if (err < 0 && (rw & WRITE)) 1069 + f2fs_write_failed(mapping, offset + count); 1070 + 1071 + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); 1072 + 1073 + return err; 1091 1074 } 1092 1075 1093 1076 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,

+6 -13

fs/f2fs/debug.c

··· 167 167 si->cache_mem += npages << PAGE_CACHE_SHIFT; 168 168 npages = META_MAPPING(sbi)->nrpages; 169 169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 170 - si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 170 + si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); 171 171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); 172 172 } 173 173 ··· 345 345 346 346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 347 347 if (!f2fs_debugfs_root) 348 - goto bail; 348 + return; 349 349 350 350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 351 351 NULL, &stat_fops); 352 - if (!file) 353 - goto free_debugfs_dir; 354 - 355 - return; 356 - 357 - free_debugfs_dir: 358 - debugfs_remove(f2fs_debugfs_root); 359 - 360 - bail: 361 - f2fs_debugfs_root = NULL; 362 - return; 352 + if (!file) { 353 + debugfs_remove(f2fs_debugfs_root); 354 + f2fs_debugfs_root = NULL; 355 + } 363 356 } 364 357 365 358 void f2fs_destroy_root_stats(void)

+59 -28

fs/f2fs/dir.c

··· 77 77 return bidx; 78 78 } 79 79 80 - static bool early_match_name(const char *name, size_t namelen, 81 - f2fs_hash_t namehash, struct f2fs_dir_entry *de) 80 + static bool early_match_name(size_t namelen, f2fs_hash_t namehash, 81 + struct f2fs_dir_entry *de) 82 82 { 83 83 if (le16_to_cpu(de->name_len) != namelen) 84 84 return false; ··· 90 90 } 91 91 92 92 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 93 - const char *name, size_t namelen, int *max_slots, 93 + struct qstr *name, int *max_slots, 94 94 f2fs_hash_t namehash, struct page **res_page) 95 95 { 96 96 struct f2fs_dir_entry *de; ··· 109 109 continue; 110 110 } 111 111 de = &dentry_blk->dentry[bit_pos]; 112 - if (early_match_name(name, namelen, namehash, de)) { 112 + if (early_match_name(name->len, namehash, de)) { 113 113 if (!memcmp(dentry_blk->filename[bit_pos], 114 - name, namelen)) { 114 + name->name, 115 + name->len)) { 115 116 *res_page = dentry_page; 116 117 goto found; 117 118 } ··· 121 120 *max_slots = max_len; 122 121 max_len = 0; 123 122 } 123 + 124 + /* 125 + * For the most part, it should be a bug when name_len is zero. 126 + * We stop here for figuring out where the bugs are occurred. 127 + */ 128 + f2fs_bug_on(!de->name_len); 129 + 124 130 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 125 131 } 126 132 ··· 140 132 } 141 133 142 134 static struct f2fs_dir_entry *find_in_level(struct inode *dir, 143 - unsigned int level, const char *name, size_t namelen, 135 + unsigned int level, struct qstr *name, 144 136 f2fs_hash_t namehash, struct page **res_page) 145 137 { 146 - int s = GET_DENTRY_SLOTS(namelen); 138 + int s = GET_DENTRY_SLOTS(name->len); 147 139 unsigned int nbucket, nblock; 148 140 unsigned int bidx, end_block; 149 141 struct page *dentry_page; ··· 168 160 continue; 169 161 } 170 162 171 - de = find_in_block(dentry_page, name, namelen, 172 - &max_slots, namehash, res_page); 163 + de = find_in_block(dentry_page, name, &max_slots, 164 + namehash, res_page); 173 165 if (de) 174 166 break; 175 167 ··· 195 187 struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, 196 188 struct qstr *child, struct page **res_page) 197 189 { 198 - const char *name = child->name; 199 - size_t namelen = child->len; 200 190 unsigned long npages = dir_blocks(dir); 201 191 struct f2fs_dir_entry *de = NULL; 202 192 f2fs_hash_t name_hash; ··· 206 200 207 201 *res_page = NULL; 208 202 209 - name_hash = f2fs_dentry_hash(name, namelen); 203 + name_hash = f2fs_dentry_hash(child); 210 204 max_depth = F2FS_I(dir)->i_current_depth; 211 205 212 206 for (level = 0; level < max_depth; level++) { 213 - de = find_in_level(dir, level, name, 214 - namelen, name_hash, res_page); 207 + de = find_in_level(dir, level, child, name_hash, res_page); 215 208 if (de) 216 209 break; 217 210 } ··· 303 298 struct page *dentry_page; 304 299 struct f2fs_dentry_block *dentry_blk; 305 300 struct f2fs_dir_entry *de; 306 - void *kaddr; 307 301 308 302 dentry_page = get_new_data_page(inode, page, 0, true); 309 303 if (IS_ERR(dentry_page)) 310 304 return PTR_ERR(dentry_page); 311 305 312 - kaddr = kmap_atomic(dentry_page); 313 - dentry_blk = (struct f2fs_dentry_block *)kaddr; 306 + 307 + dentry_blk = kmap_atomic(dentry_page); 314 308 315 309 de = &dentry_blk->dentry[0]; 316 310 de->name_len = cpu_to_le16(1); ··· 327 323 328 324 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); 329 325 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); 330 - kunmap_atomic(kaddr); 326 + kunmap_atomic(dentry_blk); 331 327 332 328 set_page_dirty(dentry_page); 333 329 f2fs_put_page(dentry_page, 1); ··· 337 333 static struct page *init_inode_metadata(struct inode *inode, 338 334 struct inode *dir, const struct qstr *name) 339 335 { 336 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 340 337 struct page *page; 341 338 int err; 342 339 343 340 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 344 - page = new_inode_page(inode, name); 341 + page = new_inode_page(inode); 345 342 if (IS_ERR(page)) 346 343 return page; 347 344 ··· 367 362 set_cold_node(inode, page); 368 363 } 369 364 370 - init_dent_inode(name, page); 365 + if (name) 366 + init_dent_inode(name, page); 371 367 372 368 /* 373 369 * This file should be checkpointed during fsync. ··· 376 370 */ 377 371 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 378 372 file_lost_pino(inode); 373 + /* 374 + * If link the tmpfile to alias through linkat path, 375 + * we should remove this inode from orphan list. 376 + */ 377 + if (inode->i_nlink == 0) 378 + remove_orphan_inode(sbi, inode->i_ino); 379 379 inc_nlink(inode); 380 380 } 381 381 return page; ··· 465 453 int err = 0; 466 454 int i; 467 455 468 - dentry_hash = f2fs_dentry_hash(name->name, name->len); 456 + dentry_hash = f2fs_dentry_hash(name); 469 457 level = 0; 470 458 current_depth = F2FS_I(dir)->i_current_depth; 471 459 if (F2FS_I(dir)->chash == dentry_hash) { ··· 541 529 return err; 542 530 } 543 531 532 + int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) 533 + { 534 + struct page *page; 535 + int err = 0; 536 + 537 + down_write(&F2FS_I(inode)->i_sem); 538 + page = init_inode_metadata(inode, dir, NULL); 539 + if (IS_ERR(page)) { 540 + err = PTR_ERR(page); 541 + goto fail; 542 + } 543 + /* we don't need to mark_inode_dirty now */ 544 + update_inode(inode, page); 545 + f2fs_put_page(page, 1); 546 + 547 + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); 548 + fail: 549 + up_write(&F2FS_I(inode)->i_sem); 550 + return err; 551 + } 552 + 544 553 /* 545 554 * It only removes the dentry from the dentry page,corresponding name 546 555 * entry in name page does not need to be touched during deletion. ··· 574 541 struct address_space *mapping = page->mapping; 575 542 struct inode *dir = mapping->host; 576 543 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 577 - void *kaddr = page_address(page); 578 544 int i; 579 545 580 546 lock_page(page); 581 547 f2fs_wait_on_page_writeback(page, DATA); 582 548 583 - dentry_blk = (struct f2fs_dentry_block *)kaddr; 584 - bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 549 + dentry_blk = page_address(page); 550 + bit_pos = dentry - dentry_blk->dentry; 585 551 for (i = 0; i < slots; i++) 586 552 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); 587 553 ··· 635 603 unsigned long nblock = dir_blocks(dir); 636 604 637 605 for (bidx = 0; bidx < nblock; bidx++) { 638 - void *kaddr; 639 606 dentry_page = get_lock_data_page(dir, bidx); 640 607 if (IS_ERR(dentry_page)) { 641 608 if (PTR_ERR(dentry_page) == -ENOENT) ··· 643 612 return false; 644 613 } 645 614 646 - kaddr = kmap_atomic(dentry_page); 647 - dentry_blk = (struct f2fs_dentry_block *)kaddr; 615 + 616 + dentry_blk = kmap_atomic(dentry_page); 648 617 if (bidx == 0) 649 618 bit_pos = 2; 650 619 else ··· 652 621 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 653 622 NR_DENTRY_IN_BLOCK, 654 623 bit_pos); 655 - kunmap_atomic(kaddr); 624 + kunmap_atomic(dentry_blk); 656 625 657 626 f2fs_put_page(dentry_page, 1); 658 627

+36 -14

fs/f2fs/f2fs.h

··· 41 41 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 42 42 #define F2FS_MOUNT_INLINE_DATA 0x00000100 43 43 #define F2FS_MOUNT_FLUSH_MERGE 0x00000200 44 + #define F2FS_MOUNT_NOBARRIER 0x00000400 44 45 45 46 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 46 47 #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) ··· 100 99 META_SSA 101 100 }; 102 101 103 - /* for the list of orphan inodes */ 104 - struct orphan_inode_entry { 102 + /* for the list of ino */ 103 + enum { 104 + ORPHAN_INO, /* for orphan ino list */ 105 + APPEND_INO, /* for append ino list */ 106 + UPDATE_INO, /* for update ino list */ 107 + MAX_INO_ENTRY, /* max. list */ 108 + }; 109 + 110 + struct ino_entry { 105 111 struct list_head list; /* list head */ 106 112 nid_t ino; /* inode number */ 107 113 }; ··· 264 256 unsigned int nat_cnt; /* the # of cached nat entries */ 265 257 struct list_head nat_entries; /* cached nat entry list (clean) */ 266 258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 259 + struct list_head nat_entry_set; /* nat entry set list */ 260 + unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 267 261 268 262 /* free node ids management */ 269 263 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ ··· 452 442 struct inode *meta_inode; /* cache meta blocks */ 453 443 struct mutex cp_mutex; /* checkpoint procedure lock */ 454 444 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 455 - struct mutex node_write; /* locking node writes */ 445 + struct rw_semaphore node_write; /* locking node writes */ 456 446 struct mutex writepages; /* mutex for writepages() */ 457 447 bool por_doing; /* recovery is doing or not */ 458 448 wait_queue_head_t cp_wait; 459 449 460 - /* for orphan inode management */ 461 - struct list_head orphan_inode_list; /* orphan inode list */ 462 - spinlock_t orphan_inode_lock; /* for orphan inode list */ 450 + /* for inode management */ 451 + struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ 452 + spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ 453 + struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ 454 + 455 + /* for orphan inode, use 0'th array */ 463 456 unsigned int n_orphans; /* # of orphan inodes */ 464 457 unsigned int max_orphans; /* max orphan inodes */ 465 458 ··· 781 768 if (flag == NAT_BITMAP) 782 769 return &ckpt->sit_nat_version_bitmap; 783 770 else 784 - return ((unsigned char *)ckpt + F2FS_BLKSIZE); 771 + return (unsigned char *)ckpt + F2FS_BLKSIZE; 785 772 } else { 786 773 offset = (flag == NAT_BITMAP) ? 787 774 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; ··· 996 983 FI_NO_EXTENT, /* not to use the extent cache */ 997 984 FI_INLINE_XATTR, /* used for inline xattr */ 998 985 FI_INLINE_DATA, /* used for inline data*/ 986 + FI_APPEND_WRITE, /* inode has appended data */ 987 + FI_UPDATE_WRITE, /* inode has in-place-update data */ 988 + FI_NEED_IPU, /* used fo ipu for fdatasync */ 999 989 }; 1000 990 1001 991 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1002 992 { 1003 - set_bit(flag, &fi->flags); 993 + if (!test_bit(flag, &fi->flags)) 994 + set_bit(flag, &fi->flags); 1004 995 } 1005 996 1006 997 static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) ··· 1014 997 1015 998 static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) 1016 999 { 1017 - clear_bit(flag, &fi->flags); 1000 + if (test_bit(flag, &fi->flags)) 1001 + clear_bit(flag, &fi->flags); 1018 1002 } 1019 1003 1020 1004 static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) ··· 1154 1136 int update_dent_inode(struct inode *, const struct qstr *); 1155 1137 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1156 1138 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 1139 + int f2fs_do_tmpfile(struct inode *, struct inode *); 1157 1140 int f2fs_make_empty(struct inode *, struct inode *); 1158 1141 bool f2fs_empty_dir(struct inode *); 1159 1142 ··· 1174 1155 /* 1175 1156 * hash.c 1176 1157 */ 1177 - f2fs_hash_t f2fs_dentry_hash(const char *, size_t); 1158 + f2fs_hash_t f2fs_dentry_hash(const struct qstr *); 1178 1159 1179 1160 /* 1180 1161 * node.c ··· 1192 1173 int truncate_xattr_node(struct inode *, struct page *); 1193 1174 int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1194 1175 void remove_inode_page(struct inode *); 1195 - struct page *new_inode_page(struct inode *, const struct qstr *); 1176 + struct page *new_inode_page(struct inode *); 1196 1177 struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1197 1178 void ra_node_page(struct f2fs_sb_info *, nid_t); 1198 1179 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); ··· 1204 1185 void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1205 1186 void recover_node_page(struct f2fs_sb_info *, struct page *, 1206 1187 struct f2fs_summary *, struct node_info *, block_t); 1188 + void recover_inline_xattr(struct inode *, struct page *); 1207 1189 bool recover_xattr_data(struct inode *, struct page *, block_t); 1208 1190 int recover_inode_page(struct f2fs_sb_info *, struct page *); 1209 1191 int restore_node_summary(struct f2fs_sb_info *, unsigned int, ··· 1226 1206 void invalidate_blocks(struct f2fs_sb_info *, block_t); 1227 1207 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1228 1208 void clear_prefree_segments(struct f2fs_sb_info *); 1229 - void discard_next_dnode(struct f2fs_sb_info *); 1209 + void discard_next_dnode(struct f2fs_sb_info *, block_t); 1230 1210 int npages_for_summary_flush(struct f2fs_sb_info *); 1231 1211 void allocate_new_segments(struct f2fs_sb_info *); 1232 1212 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); ··· 1260 1240 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1261 1241 int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1262 1242 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1243 + void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1244 + void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1245 + bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1263 1246 int acquire_orphan_inode(struct f2fs_sb_info *); 1264 1247 void release_orphan_inode(struct f2fs_sb_info *); 1265 1248 void add_orphan_inode(struct f2fs_sb_info *, nid_t); ··· 1274 1251 void remove_dirty_dir_inode(struct inode *); 1275 1252 void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1276 1253 void write_checkpoint(struct f2fs_sb_info *, bool); 1277 - void init_orphan_info(struct f2fs_sb_info *); 1254 + void init_ino_entry_info(struct f2fs_sb_info *); 1278 1255 int __init create_checkpoint_caches(void); 1279 1256 void destroy_checkpoint_caches(void); 1280 1257 ··· 1318 1295 struct f2fs_stat_info { 1319 1296 struct list_head stat_list; 1320 1297 struct f2fs_sb_info *sbi; 1321 - struct mutex stat_lock; 1322 1298 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1323 1299 int main_area_segs, main_area_sections, main_area_zones; 1324 1300 int hit_ext, total_ext;

+37 -8

fs/f2fs/file.c

··· 127 127 return 0; 128 128 129 129 trace_f2fs_sync_file_enter(inode); 130 + 131 + /* if fdatasync is triggered, let's do in-place-update */ 132 + if (datasync) 133 + set_inode_flag(fi, FI_NEED_IPU); 134 + 130 135 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 136 + if (datasync) 137 + clear_inode_flag(fi, FI_NEED_IPU); 131 138 if (ret) { 132 139 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 133 140 return ret; 141 + } 142 + 143 + /* 144 + * if there is no written data, don't waste time to write recovery info. 145 + */ 146 + if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && 147 + !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { 148 + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || 149 + exist_written_data(sbi, inode->i_ino, UPDATE_INO)) 150 + goto flush_out; 151 + goto out; 134 152 } 135 153 136 154 /* guarantee free sections for fsync */ ··· 206 188 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 207 189 if (ret) 208 190 goto out; 191 + 192 + /* once recovery info is written, don't need to tack this */ 193 + remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); 194 + clear_inode_flag(fi, FI_APPEND_WRITE); 195 + flush_out: 196 + remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 197 + clear_inode_flag(fi, FI_UPDATE_WRITE); 209 198 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 210 199 } 211 200 out: ··· 231 206 232 207 /* find first dirty page index */ 233 208 pagevec_init(&pvec, 0); 234 - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); 235 - pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; 209 + nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, 210 + PAGECACHE_TAG_DIRTY, 1); 211 + pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX; 236 212 pagevec_release(&pvec); 237 213 return pgofs; 238 214 } ··· 298 272 } 299 273 } 300 274 301 - end_offset = IS_INODE(dn.node_page) ? 302 - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; 275 + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 303 276 304 277 /* find data/hole in dnode block */ 305 278 for (; dn.ofs_in_node < end_offset; ··· 405 380 return; 406 381 407 382 lock_page(page); 408 - if (unlikely(page->mapping != inode->i_mapping)) { 409 - f2fs_put_page(page, 1); 410 - return; 411 - } 383 + if (unlikely(!PageUptodate(page) || 384 + page->mapping != inode->i_mapping)) 385 + goto out; 386 + 412 387 f2fs_wait_on_page_writeback(page, DATA); 413 388 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 414 389 set_page_dirty(page); 390 + 391 + out: 415 392 f2fs_put_page(page, 1); 416 393 } 417 394 ··· 671 644 loff_t new_size = i_size_read(inode); 672 645 loff_t off_start, off_end; 673 646 int ret = 0; 647 + 648 + f2fs_balance_fs(sbi); 674 649 675 650 ret = inode_newsize_ok(inode, (len + offset)); 676 651 if (ret)

+2 -5

fs/f2fs/gc.c

··· 186 186 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 187 187 { 188 188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 189 - unsigned int hint = 0; 190 189 unsigned int secno; 191 190 192 191 /* ··· 193 194 * selected by background GC before. 194 195 * Those segments guarantee they have small valid blocks. 195 196 */ 196 - next: 197 - secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); 198 - if (secno < TOTAL_SECS(sbi)) { 197 + for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { 199 198 if (sec_usage_check(sbi, secno)) 200 - goto next; 199 + continue; 201 200 clear_bit(secno, dirty_i->victim_secmap); 202 201 return secno * sbi->segs_per_sec; 203 202 }

+3 -1

fs/f2fs/hash.c

··· 69 69 *buf++ = pad; 70 70 } 71 71 72 - f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) 72 + f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) 73 73 { 74 74 __u32 hash; 75 75 f2fs_hash_t f2fs_hash; 76 76 const char *p; 77 77 __u32 in[8], buf[4]; 78 + const char *name = name_info->name; 79 + size_t len = name_info->len; 78 80 79 81 if ((len <= 2) && (name[0] == '.') && 80 82 (name[1] == '.' || name[1] == '\0'))

+1

fs/f2fs/inline.c

··· 172 172 stat_inc_inline_inode(inode); 173 173 } 174 174 175 + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 175 176 sync_inode_page(&dn); 176 177 f2fs_put_dnode(&dn); 177 178

+10 -2

fs/f2fs/inode.c

··· 267 267 void f2fs_evict_inode(struct inode *inode) 268 268 { 269 269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 270 + nid_t xnid = F2FS_I(inode)->i_xattr_nid; 270 271 271 272 trace_f2fs_evict_inode(inode); 272 273 truncate_inode_pages_final(&inode->i_data); 273 274 274 275 if (inode->i_ino == F2FS_NODE_INO(sbi) || 275 276 inode->i_ino == F2FS_META_INO(sbi)) 276 - goto no_delete; 277 + goto out_clear; 277 278 278 279 f2fs_bug_on(get_dirty_dents(inode)); 279 280 remove_dirty_dir_inode(inode); ··· 296 295 297 296 sb_end_intwrite(inode->i_sb); 298 297 no_delete: 299 - clear_inode(inode); 300 298 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 299 + if (xnid) 300 + invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 301 + if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) 302 + add_dirty_inode(sbi, inode->i_ino, APPEND_INO); 303 + if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) 304 + add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 305 + out_clear: 306 + clear_inode(inode); 301 307 }

+229 -19

fs/f2fs/namei.c

··· 13 13 #include <linux/pagemap.h> 14 14 #include <linux/sched.h> 15 15 #include <linux/ctype.h> 16 + #include <linux/dcache.h> 16 17 17 18 #include "f2fs.h" 18 19 #include "node.h" ··· 23 22 24 23 static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 25 24 { 26 - struct super_block *sb = dir->i_sb; 27 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 25 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 28 26 nid_t ino; 29 27 struct inode *inode; 30 28 bool nid_free = false; 31 29 int err; 32 30 33 - inode = new_inode(sb); 31 + inode = new_inode(dir->i_sb); 34 32 if (!inode) 35 33 return ERR_PTR(-ENOMEM); 36 34 ··· 102 102 static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 103 103 bool excl) 104 104 { 105 - struct super_block *sb = dir->i_sb; 106 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 105 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 107 106 struct inode *inode; 108 107 nid_t ino = 0; 109 108 int err; ··· 145 146 struct dentry *dentry) 146 147 { 147 148 struct inode *inode = old_dentry->d_inode; 148 - struct super_block *sb = dir->i_sb; 149 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 149 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 150 150 int err; 151 151 152 152 f2fs_balance_fs(sbi); ··· 205 207 206 208 static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 207 209 { 208 - struct super_block *sb = dir->i_sb; 209 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 210 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 210 211 struct inode *inode = dentry->d_inode; 211 212 struct f2fs_dir_entry *de; 212 213 struct page *page; ··· 239 242 static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 240 243 const char *symname) 241 244 { 242 - struct super_block *sb = dir->i_sb; 243 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 245 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 244 246 struct inode *inode; 245 247 size_t symlen = strlen(symname) + 1; 246 248 int err; ··· 326 330 static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 327 331 umode_t mode, dev_t rdev) 328 332 { 329 - struct super_block *sb = dir->i_sb; 330 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 333 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 331 334 struct inode *inode; 332 335 int err = 0; 333 336 ··· 364 369 static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 365 370 struct inode *new_dir, struct dentry *new_dentry) 366 371 { 367 - struct super_block *sb = old_dir->i_sb; 368 - struct f2fs_sb_info *sbi = F2FS_SB(sb); 372 + struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); 369 373 struct inode *old_inode = old_dentry->d_inode; 370 374 struct inode *new_inode = new_dentry->d_inode; 371 375 struct page *old_dir_page; ··· 387 393 goto out_old; 388 394 } 389 395 390 - f2fs_lock_op(sbi); 391 - 392 396 if (new_inode) { 393 397 394 398 err = -ENOTEMPTY; ··· 398 406 &new_page); 399 407 if (!new_entry) 400 408 goto out_dir; 409 + 410 + f2fs_lock_op(sbi); 401 411 402 412 err = acquire_orphan_inode(sbi); 403 413 if (err) ··· 429 435 update_inode_page(old_inode); 430 436 update_inode_page(new_inode); 431 437 } else { 438 + f2fs_lock_op(sbi); 439 + 432 440 err = f2fs_add_link(new_dentry, old_inode); 433 - if (err) 441 + if (err) { 442 + f2fs_unlock_op(sbi); 434 443 goto out_dir; 444 + } 435 445 436 446 if (old_dir_entry) { 437 447 inc_nlink(new_dir); ··· 470 472 return 0; 471 473 472 474 put_out_dir: 475 + f2fs_unlock_op(sbi); 473 476 kunmap(new_page); 474 477 f2fs_put_page(new_page, 0); 475 478 out_dir: ··· 478 479 kunmap(old_dir_page); 479 480 f2fs_put_page(old_dir_page, 0); 480 481 } 481 - f2fs_unlock_op(sbi); 482 482 out_old: 483 483 kunmap(old_page); 484 484 f2fs_put_page(old_page, 0); 485 485 out: 486 + return err; 487 + } 488 + 489 + static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, 490 + struct inode *new_dir, struct dentry *new_dentry) 491 + { 492 + struct super_block *sb = old_dir->i_sb; 493 + struct f2fs_sb_info *sbi = F2FS_SB(sb); 494 + struct inode *old_inode = old_dentry->d_inode; 495 + struct inode *new_inode = new_dentry->d_inode; 496 + struct page *old_dir_page, *new_dir_page; 497 + struct page *old_page, *new_page; 498 + struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; 499 + struct f2fs_dir_entry *old_entry, *new_entry; 500 + int old_nlink = 0, new_nlink = 0; 501 + int err = -ENOENT; 502 + 503 + f2fs_balance_fs(sbi); 504 + 505 + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 506 + if (!old_entry) 507 + goto out; 508 + 509 + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); 510 + if (!new_entry) 511 + goto out_old; 512 + 513 + /* prepare for updating ".." directory entry info later */ 514 + if (old_dir != new_dir) { 515 + if (S_ISDIR(old_inode->i_mode)) { 516 + err = -EIO; 517 + old_dir_entry = f2fs_parent_dir(old_inode, 518 + &old_dir_page); 519 + if (!old_dir_entry) 520 + goto out_new; 521 + } 522 + 523 + if (S_ISDIR(new_inode->i_mode)) { 524 + err = -EIO; 525 + new_dir_entry = f2fs_parent_dir(new_inode, 526 + &new_dir_page); 527 + if (!new_dir_entry) 528 + goto out_old_dir; 529 + } 530 + } 531 + 532 + /* 533 + * If cross rename between file and directory those are not 534 + * in the same directory, we will inc nlink of file's parent 535 + * later, so we should check upper boundary of its nlink. 536 + */ 537 + if ((!old_dir_entry || !new_dir_entry) && 538 + old_dir_entry != new_dir_entry) { 539 + old_nlink = old_dir_entry ? -1 : 1; 540 + new_nlink = -old_nlink; 541 + err = -EMLINK; 542 + if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || 543 + (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) 544 + goto out_new_dir; 545 + } 546 + 547 + f2fs_lock_op(sbi); 548 + 549 + err = update_dent_inode(old_inode, &new_dentry->d_name); 550 + if (err) 551 + goto out_unlock; 552 + 553 + err = update_dent_inode(new_inode, &old_dentry->d_name); 554 + if (err) 555 + goto out_undo; 556 + 557 + /* update ".." directory entry info of old dentry */ 558 + if (old_dir_entry) 559 + f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); 560 + 561 + /* update ".." directory entry info of new dentry */ 562 + if (new_dir_entry) 563 + f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); 564 + 565 + /* update directory entry info of old dir inode */ 566 + f2fs_set_link(old_dir, old_entry, old_page, new_inode); 567 + 568 + down_write(&F2FS_I(old_inode)->i_sem); 569 + file_lost_pino(old_inode); 570 + up_write(&F2FS_I(old_inode)->i_sem); 571 + 572 + update_inode_page(old_inode); 573 + 574 + old_dir->i_ctime = CURRENT_TIME; 575 + if (old_nlink) { 576 + down_write(&F2FS_I(old_dir)->i_sem); 577 + if (old_nlink < 0) 578 + drop_nlink(old_dir); 579 + else 580 + inc_nlink(old_dir); 581 + up_write(&F2FS_I(old_dir)->i_sem); 582 + } 583 + mark_inode_dirty(old_dir); 584 + update_inode_page(old_dir); 585 + 586 + /* update directory entry info of new dir inode */ 587 + f2fs_set_link(new_dir, new_entry, new_page, old_inode); 588 + 589 + down_write(&F2FS_I(new_inode)->i_sem); 590 + file_lost_pino(new_inode); 591 + up_write(&F2FS_I(new_inode)->i_sem); 592 + 593 + update_inode_page(new_inode); 594 + 595 + new_dir->i_ctime = CURRENT_TIME; 596 + if (new_nlink) { 597 + down_write(&F2FS_I(new_dir)->i_sem); 598 + if (new_nlink < 0) 599 + drop_nlink(new_dir); 600 + else 601 + inc_nlink(new_dir); 602 + up_write(&F2FS_I(new_dir)->i_sem); 603 + } 604 + mark_inode_dirty(new_dir); 605 + update_inode_page(new_dir); 606 + 607 + f2fs_unlock_op(sbi); 608 + return 0; 609 + out_undo: 610 + /* Still we may fail to recover name info of f2fs_inode here */ 611 + update_dent_inode(old_inode, &old_dentry->d_name); 612 + out_unlock: 613 + f2fs_unlock_op(sbi); 614 + out_new_dir: 615 + if (new_dir_entry) { 616 + kunmap(new_dir_page); 617 + f2fs_put_page(new_dir_page, 0); 618 + } 619 + out_old_dir: 620 + if (old_dir_entry) { 621 + kunmap(old_dir_page); 622 + f2fs_put_page(old_dir_page, 0); 623 + } 624 + out_new: 625 + kunmap(new_page); 626 + f2fs_put_page(new_page, 0); 627 + out_old: 628 + kunmap(old_page); 629 + f2fs_put_page(old_page, 0); 630 + out: 631 + return err; 632 + } 633 + 634 + static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, 635 + struct inode *new_dir, struct dentry *new_dentry, 636 + unsigned int flags) 637 + { 638 + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 639 + return -EINVAL; 640 + 641 + if (flags & RENAME_EXCHANGE) { 642 + return f2fs_cross_rename(old_dir, old_dentry, 643 + new_dir, new_dentry); 644 + } 645 + /* 646 + * VFS has already handled the new dentry existence case, 647 + * here, we just deal with "RENAME_NOREPLACE" as regular rename. 648 + */ 649 + return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); 650 + } 651 + 652 + static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 653 + { 654 + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 655 + struct inode *inode; 656 + int err; 657 + 658 + inode = f2fs_new_inode(dir, mode); 659 + if (IS_ERR(inode)) 660 + return PTR_ERR(inode); 661 + 662 + inode->i_op = &f2fs_file_inode_operations; 663 + inode->i_fop = &f2fs_file_operations; 664 + inode->i_mapping->a_ops = &f2fs_dblock_aops; 665 + 666 + f2fs_lock_op(sbi); 667 + err = acquire_orphan_inode(sbi); 668 + if (err) 669 + goto out; 670 + 671 + err = f2fs_do_tmpfile(inode, dir); 672 + if (err) 673 + goto release_out; 674 + 675 + /* 676 + * add this non-linked tmpfile to orphan list, in this way we could 677 + * remove all unused data of tmpfile after abnormal power-off. 678 + */ 679 + add_orphan_inode(sbi, inode->i_ino); 680 + f2fs_unlock_op(sbi); 681 + 682 + alloc_nid_done(sbi, inode->i_ino); 683 + d_tmpfile(dentry, inode); 684 + unlock_new_inode(inode); 685 + return 0; 686 + 687 + release_out: 688 + release_orphan_inode(sbi); 689 + out: 690 + f2fs_unlock_op(sbi); 691 + clear_nlink(inode); 692 + unlock_new_inode(inode); 693 + make_bad_inode(inode); 694 + iput(inode); 695 + alloc_nid_failed(sbi, inode->i_ino); 486 696 return err; 487 697 } 488 698 ··· 705 497 .rmdir = f2fs_rmdir, 706 498 .mknod = f2fs_mknod, 707 499 .rename = f2fs_rename, 500 + .rename2 = f2fs_rename2, 501 + .tmpfile = f2fs_tmpfile, 708 502 .getattr = f2fs_getattr, 709 503 .setattr = f2fs_setattr, 710 504 .get_acl = f2fs_get_acl,

+180 -87

fs/f2fs/node.c

··· 25 25 26 26 static struct kmem_cache *nat_entry_slab; 27 27 static struct kmem_cache *free_nid_slab; 28 + static struct kmem_cache *nat_entry_set_slab; 28 29 29 30 bool available_free_memory(struct f2fs_sb_info *sbi, int type) 30 31 { ··· 91 90 92 91 /* get current nat block page with lock */ 93 92 src_page = get_meta_page(sbi, src_off); 94 - 95 - /* Dirty src_page means that it is already the new target NAT page. */ 96 - if (PageDirty(src_page)) 97 - return src_page; 98 - 99 93 dst_page = grab_meta_page(sbi, dst_off); 94 + f2fs_bug_on(PageDirty(src_page)); 100 95 101 96 src_addr = page_address(src_page); 102 97 dst_addr = page_address(dst_page); ··· 842 845 truncate_node(&dn); 843 846 } 844 847 845 - struct page *new_inode_page(struct inode *inode, const struct qstr *name) 848 + struct page *new_inode_page(struct inode *inode) 846 849 { 847 850 struct dnode_of_data dn; 848 851 ··· 1231 1234 if (wbc->for_reclaim) 1232 1235 goto redirty_out; 1233 1236 1234 - mutex_lock(&sbi->node_write); 1237 + down_read(&sbi->node_write); 1235 1238 set_page_writeback(page); 1236 1239 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1237 1240 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); 1238 1241 dec_page_count(sbi, F2FS_DIRTY_NODES); 1239 - mutex_unlock(&sbi->node_write); 1242 + up_read(&sbi->node_write); 1240 1243 unlock_page(page); 1241 1244 return 0; 1242 1245 ··· 1549 1552 clear_node_page_dirty(page); 1550 1553 } 1551 1554 1552 - static void recover_inline_xattr(struct inode *inode, struct page *page) 1555 + void recover_inline_xattr(struct inode *inode, struct page *page) 1553 1556 { 1554 1557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1555 1558 void *src_addr, *dst_addr; ··· 1587 1590 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1588 1591 nid_t new_xnid = nid_of_node(page); 1589 1592 struct node_info ni; 1590 - 1591 - recover_inline_xattr(inode, page); 1592 1593 1593 1594 if (!f2fs_has_xattr_block(ofs_of_node(page))) 1594 1595 return false; ··· 1739 1744 return err; 1740 1745 } 1741 1746 1742 - static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1747 + static struct nat_entry_set *grab_nat_entry_set(void) 1748 + { 1749 + struct nat_entry_set *nes = 1750 + f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); 1751 + 1752 + nes->entry_cnt = 0; 1753 + INIT_LIST_HEAD(&nes->set_list); 1754 + INIT_LIST_HEAD(&nes->entry_list); 1755 + return nes; 1756 + } 1757 + 1758 + static void release_nat_entry_set(struct nat_entry_set *nes, 1759 + struct f2fs_nm_info *nm_i) 1760 + { 1761 + f2fs_bug_on(!list_empty(&nes->entry_list)); 1762 + 1763 + nm_i->dirty_nat_cnt -= nes->entry_cnt; 1764 + list_del(&nes->set_list); 1765 + kmem_cache_free(nat_entry_set_slab, nes); 1766 + } 1767 + 1768 + static void adjust_nat_entry_set(struct nat_entry_set *nes, 1769 + struct list_head *head) 1770 + { 1771 + struct nat_entry_set *next = nes; 1772 + 1773 + if (list_is_last(&nes->set_list, head)) 1774 + return; 1775 + 1776 + list_for_each_entry_continue(next, head, set_list) 1777 + if (nes->entry_cnt <= next->entry_cnt) 1778 + break; 1779 + 1780 + list_move_tail(&nes->set_list, &next->set_list); 1781 + } 1782 + 1783 + static void add_nat_entry(struct nat_entry *ne, struct list_head *head) 1784 + { 1785 + struct nat_entry_set *nes; 1786 + nid_t start_nid = START_NID(ne->ni.nid); 1787 + 1788 + list_for_each_entry(nes, head, set_list) { 1789 + if (nes->start_nid == start_nid) { 1790 + list_move_tail(&ne->list, &nes->entry_list); 1791 + nes->entry_cnt++; 1792 + adjust_nat_entry_set(nes, head); 1793 + return; 1794 + } 1795 + } 1796 + 1797 + nes = grab_nat_entry_set(); 1798 + 1799 + nes->start_nid = start_nid; 1800 + list_move_tail(&ne->list, &nes->entry_list); 1801 + nes->entry_cnt++; 1802 + list_add(&nes->set_list, head); 1803 + } 1804 + 1805 + static void merge_nats_in_set(struct f2fs_sb_info *sbi) 1806 + { 1807 + struct f2fs_nm_info *nm_i = NM_I(sbi); 1808 + struct list_head *dirty_list = &nm_i->dirty_nat_entries; 1809 + struct list_head *set_list = &nm_i->nat_entry_set; 1810 + struct nat_entry *ne, *tmp; 1811 + 1812 + write_lock(&nm_i->nat_tree_lock); 1813 + list_for_each_entry_safe(ne, tmp, dirty_list, list) { 1814 + if (nat_get_blkaddr(ne) == NEW_ADDR) 1815 + continue; 1816 + add_nat_entry(ne, set_list); 1817 + nm_i->dirty_nat_cnt++; 1818 + } 1819 + write_unlock(&nm_i->nat_tree_lock); 1820 + } 1821 + 1822 + static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) 1823 + { 1824 + if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) 1825 + return true; 1826 + else 1827 + return false; 1828 + } 1829 + 1830 + static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1743 1831 { 1744 1832 struct f2fs_nm_info *nm_i = NM_I(sbi); 1745 1833 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); ··· 1830 1752 int i; 1831 1753 1832 1754 mutex_lock(&curseg->curseg_mutex); 1833 - 1834 - if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { 1835 - mutex_unlock(&curseg->curseg_mutex); 1836 - return false; 1837 - } 1838 - 1839 1755 for (i = 0; i < nats_in_cursum(sum); i++) { 1840 1756 struct nat_entry *ne; 1841 1757 struct f2fs_nat_entry raw_ne; ··· 1839 1767 retry: 1840 1768 write_lock(&nm_i->nat_tree_lock); 1841 1769 ne = __lookup_nat_cache(nm_i, nid); 1842 - if (ne) { 1843 - __set_nat_cache_dirty(nm_i, ne); 1844 - write_unlock(&nm_i->nat_tree_lock); 1845 - continue; 1846 - } 1770 + if (ne) 1771 + goto found; 1772 + 1847 1773 ne = grab_nat_entry(nm_i, nid); 1848 1774 if (!ne) { 1849 1775 write_unlock(&nm_i->nat_tree_lock); 1850 1776 goto retry; 1851 1777 } 1852 1778 node_info_from_raw_nat(&ne->ni, &raw_ne); 1779 + found: 1853 1780 __set_nat_cache_dirty(nm_i, ne); 1854 1781 write_unlock(&nm_i->nat_tree_lock); 1855 1782 } 1856 1783 update_nats_in_cursum(sum, -i); 1857 1784 mutex_unlock(&curseg->curseg_mutex); 1858 - return true; 1859 1785 } 1860 1786 1861 1787 /* ··· 1864 1794 struct f2fs_nm_info *nm_i = NM_I(sbi); 1865 1795 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1866 1796 struct f2fs_summary_block *sum = curseg->sum_blk; 1867 - struct nat_entry *ne, *cur; 1868 - struct page *page = NULL; 1869 - struct f2fs_nat_block *nat_blk = NULL; 1870 - nid_t start_nid = 0, end_nid = 0; 1871 - bool flushed; 1797 + struct nat_entry_set *nes, *tmp; 1798 + struct list_head *head = &nm_i->nat_entry_set; 1799 + bool to_journal = true; 1872 1800 1873 - flushed = flush_nats_in_journal(sbi); 1801 + /* merge nat entries of dirty list to nat entry set temporarily */ 1802 + merge_nats_in_set(sbi); 1874 1803 1875 - if (!flushed) 1876 - mutex_lock(&curseg->curseg_mutex); 1804 + /* 1805 + * if there are no enough space in journal to store dirty nat 1806 + * entries, remove all entries from journal and merge them 1807 + * into nat entry set. 1808 + */ 1809 + if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { 1810 + remove_nats_in_journal(sbi); 1877 1811 1878 - /* 1) flush dirty nat caches */ 1879 - list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) { 1880 - nid_t nid; 1881 - struct f2fs_nat_entry raw_ne; 1882 - int offset = -1; 1812 + /* 1813 + * merge nat entries of dirty list to nat entry set temporarily 1814 + */ 1815 + merge_nats_in_set(sbi); 1816 + } 1883 1817 1884 - if (nat_get_blkaddr(ne) == NEW_ADDR) 1885 - continue; 1818 + if (!nm_i->dirty_nat_cnt) 1819 + return; 1886 1820 1887 - nid = nat_get_nid(ne); 1821 + /* 1822 + * there are two steps to flush nat entries: 1823 + * #1, flush nat entries to journal in current hot data summary block. 1824 + * #2, flush nat entries to nat page. 1825 + */ 1826 + list_for_each_entry_safe(nes, tmp, head, set_list) { 1827 + struct f2fs_nat_block *nat_blk; 1828 + struct nat_entry *ne, *cur; 1829 + struct page *page; 1830 + nid_t start_nid = nes->start_nid; 1888 1831 1889 - if (flushed) 1890 - goto to_nat_page; 1832 + if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) 1833 + to_journal = false; 1891 1834 1892 - /* if there is room for nat enries in curseg->sumpage */ 1893 - offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1894 - if (offset >= 0) { 1895 - raw_ne = nat_in_journal(sum, offset); 1896 - goto flush_now; 1897 - } 1898 - to_nat_page: 1899 - if (!page || (start_nid > nid || nid > end_nid)) { 1900 - if (page) { 1901 - f2fs_put_page(page, 1); 1902 - page = NULL; 1903 - } 1904 - start_nid = START_NID(nid); 1905 - end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; 1906 - 1907 - /* 1908 - * get nat block with dirty flag, increased reference 1909 - * count, mapped and lock 1910 - */ 1835 + if (to_journal) { 1836 + mutex_lock(&curseg->curseg_mutex); 1837 + } else { 1911 1838 page = get_next_nat_page(sbi, start_nid); 1912 1839 nat_blk = page_address(page); 1840 + f2fs_bug_on(!nat_blk); 1913 1841 } 1914 1842 1915 - f2fs_bug_on(!nat_blk); 1916 - raw_ne = nat_blk->entries[nid - start_nid]; 1917 - flush_now: 1918 - raw_nat_from_node_info(&raw_ne, &ne->ni); 1843 + /* flush dirty nats in nat entry set */ 1844 + list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { 1845 + struct f2fs_nat_entry *raw_ne; 1846 + nid_t nid = nat_get_nid(ne); 1847 + int offset; 1919 1848 1920 - if (offset < 0) { 1921 - nat_blk->entries[nid - start_nid] = raw_ne; 1922 - } else { 1923 - nat_in_journal(sum, offset) = raw_ne; 1924 - nid_in_journal(sum, offset) = cpu_to_le32(nid); 1925 - } 1849 + if (to_journal) { 1850 + offset = lookup_journal_in_cursum(sum, 1851 + NAT_JOURNAL, nid, 1); 1852 + f2fs_bug_on(offset < 0); 1853 + raw_ne = &nat_in_journal(sum, offset); 1854 + nid_in_journal(sum, offset) = cpu_to_le32(nid); 1855 + } else { 1856 + raw_ne = &nat_blk->entries[nid - start_nid]; 1857 + } 1858 + raw_nat_from_node_info(raw_ne, &ne->ni); 1926 1859 1927 - if (nat_get_blkaddr(ne) == NULL_ADDR && 1860 + if (nat_get_blkaddr(ne) == NULL_ADDR && 1928 1861 add_free_nid(sbi, nid, false) <= 0) { 1929 - write_lock(&nm_i->nat_tree_lock); 1930 - __del_from_nat_cache(nm_i, ne); 1931 - write_unlock(&nm_i->nat_tree_lock); 1932 - } else { 1933 - write_lock(&nm_i->nat_tree_lock); 1934 - __clear_nat_cache_dirty(nm_i, ne); 1935 - write_unlock(&nm_i->nat_tree_lock); 1862 + write_lock(&nm_i->nat_tree_lock); 1863 + __del_from_nat_cache(nm_i, ne); 1864 + write_unlock(&nm_i->nat_tree_lock); 1865 + } else { 1866 + write_lock(&nm_i->nat_tree_lock); 1867 + __clear_nat_cache_dirty(nm_i, ne); 1868 + write_unlock(&nm_i->nat_tree_lock); 1869 + } 1936 1870 } 1871 + 1872 + if (to_journal) 1873 + mutex_unlock(&curseg->curseg_mutex); 1874 + else 1875 + f2fs_put_page(page, 1); 1876 + 1877 + release_nat_entry_set(nes, nm_i); 1937 1878 } 1938 - if (!flushed) 1939 - mutex_unlock(&curseg->curseg_mutex); 1940 - f2fs_put_page(page, 1); 1879 + 1880 + f2fs_bug_on(!list_empty(head)); 1881 + f2fs_bug_on(nm_i->dirty_nat_cnt); 1941 1882 } 1942 1883 1943 1884 static int init_node_manager(struct f2fs_sb_info *sbi) ··· 1977 1896 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1978 1897 INIT_LIST_HEAD(&nm_i->nat_entries); 1979 1898 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1899 + INIT_LIST_HEAD(&nm_i->nat_entry_set); 1980 1900 1981 1901 mutex_init(&nm_i->build_lock); 1982 1902 spin_lock_init(&nm_i->free_nid_list_lock); ··· 2058 1976 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2059 1977 sizeof(struct nat_entry)); 2060 1978 if (!nat_entry_slab) 2061 - return -ENOMEM; 1979 + goto fail; 2062 1980 2063 1981 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2064 1982 sizeof(struct free_nid)); 2065 - if (!free_nid_slab) { 2066 - kmem_cache_destroy(nat_entry_slab); 2067 - return -ENOMEM; 2068 - } 1983 + if (!free_nid_slab) 1984 + goto destory_nat_entry; 1985 + 1986 + nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", 1987 + sizeof(struct nat_entry_set)); 1988 + if (!nat_entry_set_slab) 1989 + goto destory_free_nid; 2069 1990 return 0; 1991 + 1992 + destory_free_nid: 1993 + kmem_cache_destroy(free_nid_slab); 1994 + destory_nat_entry: 1995 + kmem_cache_destroy(nat_entry_slab); 1996 + fail: 1997 + return -ENOMEM; 2070 1998 } 2071 1999 2072 2000 void destroy_node_manager_caches(void) 2073 2001 { 2002 + kmem_cache_destroy(nat_entry_set_slab); 2074 2003 kmem_cache_destroy(free_nid_slab); 2075 2004 kmem_cache_destroy(nat_entry_slab); 2076 2005 }

+7

fs/f2fs/node.h

··· 89 89 DIRTY_DENTS /* indicates dirty dentry pages */ 90 90 }; 91 91 92 + struct nat_entry_set { 93 + struct list_head set_list; /* link with all nat sets */ 94 + struct list_head entry_list; /* link with dirty nat entries */ 95 + nid_t start_nid; /* start nid of nats in set */ 96 + unsigned int entry_cnt; /* the # of nat entries in set */ 97 + }; 98 + 92 99 /* 93 100 * For free nid mangement 94 101 */

+21 -1

fs/f2fs/recovery.c

··· 300 300 struct node_info ni; 301 301 int err = 0, recovered = 0; 302 302 303 + recover_inline_xattr(inode, page); 304 + 303 305 if (recover_inline_data(inode, page)) 304 306 goto out; 305 307 ··· 436 434 437 435 int recover_fsync_data(struct f2fs_sb_info *sbi) 438 436 { 437 + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 439 438 struct list_head inode_list; 439 + block_t blkaddr; 440 440 int err; 441 441 bool need_writecp = false; 442 442 ··· 451 447 452 448 /* step #1: find fsynced inode numbers */ 453 449 sbi->por_doing = true; 450 + 451 + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 452 + 454 453 err = find_fsync_dnodes(sbi, &inode_list); 455 454 if (err) 456 455 goto out; ··· 469 462 out: 470 463 destroy_fsync_dnodes(&inode_list); 471 464 kmem_cache_destroy(fsync_entry_slab); 465 + 466 + if (err) { 467 + truncate_inode_pages_final(NODE_MAPPING(sbi)); 468 + truncate_inode_pages_final(META_MAPPING(sbi)); 469 + } 470 + 472 471 sbi->por_doing = false; 473 - if (!err && need_writecp) 472 + if (err) { 473 + discard_next_dnode(sbi, blkaddr); 474 + 475 + /* Flush all the NAT/SIT pages */ 476 + while (get_pages(sbi, F2FS_DIRTY_META)) 477 + sync_meta_pages(sbi, META, LONG_MAX); 478 + } else if (need_writecp) { 474 479 write_checkpoint(sbi, false); 480 + } 475 481 return err; 476 482 }

+16 -22

fs/f2fs/segment.c

··· 239 239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 240 240 struct flush_cmd cmd; 241 241 242 + trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), 243 + test_opt(sbi, FLUSH_MERGE)); 244 + 245 + if (test_opt(sbi, NOBARRIER)) 246 + return 0; 247 + 242 248 if (!test_opt(sbi, FLUSH_MERGE)) 243 249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 244 250 ··· 278 272 return -ENOMEM; 279 273 spin_lock_init(&fcc->issue_lock); 280 274 init_waitqueue_head(&fcc->flush_wait_queue); 281 - sbi->sm_info->cmd_control_info = fcc; 275 + SM_I(sbi)->cmd_control_info = fcc; 282 276 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 283 277 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 284 278 if (IS_ERR(fcc->f2fs_issue_flush)) { 285 279 err = PTR_ERR(fcc->f2fs_issue_flush); 286 280 kfree(fcc); 287 - sbi->sm_info->cmd_control_info = NULL; 281 + SM_I(sbi)->cmd_control_info = NULL; 288 282 return err; 289 283 } 290 284 ··· 293 287 294 288 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 295 289 { 296 - struct flush_cmd_control *fcc = 297 - sbi->sm_info->cmd_control_info; 290 + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 298 291 299 292 if (fcc && fcc->f2fs_issue_flush) 300 293 kthread_stop(fcc->f2fs_issue_flush); 301 294 kfree(fcc); 302 - sbi->sm_info->cmd_control_info = NULL; 295 + SM_I(sbi)->cmd_control_info = NULL; 303 296 } 304 297 305 298 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, ··· 382 377 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 383 378 } 384 379 385 - void discard_next_dnode(struct f2fs_sb_info *sbi) 380 + void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) 386 381 { 387 - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 388 - block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 389 - 390 382 if (f2fs_issue_discard(sbi, blkaddr, 1)) { 391 383 struct page *page = grab_meta_page(sbi, blkaddr); 392 384 /* zero-filled page */ ··· 439 437 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 440 438 { 441 439 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 442 - unsigned int segno = -1; 440 + unsigned int segno; 443 441 unsigned int total_segs = TOTAL_SEGS(sbi); 444 442 445 443 mutex_lock(&dirty_i->seglist_lock); 446 - while (1) { 447 - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, 448 - segno + 1); 449 - if (segno >= total_segs) 450 - break; 444 + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) 451 445 __set_test_and_free(sbi, segno); 452 - } 453 446 mutex_unlock(&dirty_i->seglist_lock); 454 447 } 455 448 ··· 971 974 { 972 975 struct sit_info *sit_i = SIT_I(sbi); 973 976 struct curseg_info *curseg; 974 - unsigned int old_cursegno; 975 977 976 978 curseg = CURSEG_I(sbi, type); 977 979 978 980 mutex_lock(&curseg->curseg_mutex); 979 981 980 982 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 981 - old_cursegno = curseg->segno; 982 983 983 984 /* 984 985 * __add_sum_entry should be resided under the curseg_mutex ··· 997 1002 * since SSR needs latest valid block information. 998 1003 */ 999 1004 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 1000 - locate_dirty_segment(sbi, old_cursegno); 1001 1005 1002 1006 mutex_unlock(&sit_i->sentry_lock); 1003 1007 ··· 1526 1532 struct page *page = NULL; 1527 1533 struct f2fs_sit_block *raw_sit = NULL; 1528 1534 unsigned int start = 0, end = 0; 1529 - unsigned int segno = -1; 1535 + unsigned int segno; 1530 1536 bool flushed; 1531 1537 1532 1538 mutex_lock(&curseg->curseg_mutex); ··· 1538 1544 */ 1539 1545 flushed = flush_sits_in_journal(sbi); 1540 1546 1541 - while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { 1547 + for_each_set_bit(segno, bitmap, nsegs) { 1542 1548 struct seg_entry *se = get_seg_entry(sbi, segno); 1543 1549 int sit_offset, offset; 1544 1550 ··· 1697 1703 struct curseg_info *array; 1698 1704 int i; 1699 1705 1700 - array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); 1706 + array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); 1701 1707 if (!array) 1702 1708 return -ENOMEM; 1703 1709

+6 -2

fs/f2fs/segment.h

··· 347 347 if (test_and_clear_bit(segno, free_i->free_segmap)) { 348 348 free_i->free_segments++; 349 349 350 - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), 351 - start_segno); 350 + next = find_next_bit(free_i->free_segmap, 351 + start_segno + sbi->segs_per_sec, start_segno); 352 352 if (next >= start_segno + sbi->segs_per_sec) { 353 353 if (test_and_clear_bit(secno, free_i->free_secmap)) 354 354 free_i->free_sections++; ··· 485 485 /* IPU can be done only for the user data */ 486 486 if (S_ISDIR(inode->i_mode)) 487 487 return false; 488 + 489 + /* this is only set during fdatasync */ 490 + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) 491 + return true; 488 492 489 493 switch (SM_I(sbi)->ipu_policy) { 490 494 case F2FS_IPU_FORCE:

+14 -7

fs/f2fs/super.c

··· 52 52 Opt_inline_xattr, 53 53 Opt_inline_data, 54 54 Opt_flush_merge, 55 + Opt_nobarrier, 55 56 Opt_err, 56 57 }; 57 58 ··· 70 69 {Opt_inline_xattr, "inline_xattr"}, 71 70 {Opt_inline_data, "inline_data"}, 72 71 {Opt_flush_merge, "flush_merge"}, 72 + {Opt_nobarrier, "nobarrier"}, 73 73 {Opt_err, NULL}, 74 74 }; 75 75 ··· 341 339 case Opt_flush_merge: 342 340 set_opt(sbi, FLUSH_MERGE); 343 341 break; 342 + case Opt_nobarrier: 343 + set_opt(sbi, NOBARRIER); 344 + break; 344 345 default: 345 346 f2fs_msg(sb, KERN_ERR, 346 347 "Unrecognized mount option \"%s\" or missing value", ··· 549 544 seq_puts(seq, ",inline_data"); 550 545 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 551 546 seq_puts(seq, ",flush_merge"); 547 + if (test_opt(sbi, NOBARRIER)) 548 + seq_puts(seq, ",nobarrier"); 552 549 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 553 550 554 551 return 0; ··· 622 615 * Previous and new state of filesystem is RO, 623 616 * so skip checking GC and FLUSH_MERGE conditions. 624 617 */ 625 - if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 618 + if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) 626 619 goto skip; 627 620 628 621 /* ··· 649 642 */ 650 643 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { 651 644 destroy_flush_cmd_control(sbi); 652 - } else if (test_opt(sbi, FLUSH_MERGE) && 653 - !sbi->sm_info->cmd_control_info) { 645 + } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { 654 646 err = create_flush_cmd_control(sbi); 655 647 if (err) 656 648 goto restore_gc; ··· 953 947 mutex_init(&sbi->gc_mutex); 954 948 mutex_init(&sbi->writepages); 955 949 mutex_init(&sbi->cp_mutex); 956 - mutex_init(&sbi->node_write); 950 + init_rwsem(&sbi->node_write); 957 951 sbi->por_doing = false; 958 952 spin_lock_init(&sbi->stat_lock); 959 953 ··· 1003 997 INIT_LIST_HEAD(&sbi->dir_inode_list); 1004 998 spin_lock_init(&sbi->dir_inode_lock); 1005 999 1006 - init_orphan_info(sbi); 1000 + init_ino_entry_info(sbi); 1007 1001 1008 1002 /* setup f2fs internal modules */ 1009 1003 err = build_segment_manager(sbi); ··· 1040 1034 goto free_node_inode; 1041 1035 } 1042 1036 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1037 + iput(root); 1043 1038 err = -EINVAL; 1044 - goto free_root_inode; 1039 + goto free_node_inode; 1045 1040 } 1046 1041 1047 1042 sb->s_root = d_make_root(root); /* allocate root dentry */ ··· 1089 1082 * If filesystem is not mounted as read-only then 1090 1083 * do start the gc_thread. 1091 1084 */ 1092 - if (!(sb->s_flags & MS_RDONLY)) { 1085 + if (!f2fs_readonly(sb)) { 1093 1086 /* After POR, we can run background GC thread.*/ 1094 1087 err = start_gc_thread(sbi); 1095 1088 if (err)

+87

include/trace/events/f2fs.h

··· 587 587 __entry->ret) 588 588 ); 589 589 590 + TRACE_EVENT(f2fs_direct_IO_enter, 591 + 592 + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), 593 + 594 + TP_ARGS(inode, offset, len, rw), 595 + 596 + TP_STRUCT__entry( 597 + __field(dev_t, dev) 598 + __field(ino_t, ino) 599 + __field(loff_t, pos) 600 + __field(unsigned long, len) 601 + __field(int, rw) 602 + ), 603 + 604 + TP_fast_assign( 605 + __entry->dev = inode->i_sb->s_dev; 606 + __entry->ino = inode->i_ino; 607 + __entry->pos = offset; 608 + __entry->len = len; 609 + __entry->rw = rw; 610 + ), 611 + 612 + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d", 613 + show_dev_ino(__entry), 614 + __entry->pos, 615 + __entry->len, 616 + __entry->rw) 617 + ); 618 + 619 + TRACE_EVENT(f2fs_direct_IO_exit, 620 + 621 + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, 622 + int rw, int ret), 623 + 624 + TP_ARGS(inode, offset, len, rw, ret), 625 + 626 + TP_STRUCT__entry( 627 + __field(dev_t, dev) 628 + __field(ino_t, ino) 629 + __field(loff_t, pos) 630 + __field(unsigned long, len) 631 + __field(int, rw) 632 + __field(int, ret) 633 + ), 634 + 635 + TP_fast_assign( 636 + __entry->dev = inode->i_sb->s_dev; 637 + __entry->ino = inode->i_ino; 638 + __entry->pos = offset; 639 + __entry->len = len; 640 + __entry->rw = rw; 641 + __entry->ret = ret; 642 + ), 643 + 644 + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu " 645 + "rw = %d ret = %d", 646 + show_dev_ino(__entry), 647 + __entry->pos, 648 + __entry->len, 649 + __entry->rw, 650 + __entry->ret) 651 + ); 652 + 590 653 TRACE_EVENT(f2fs_reserve_new_block, 591 654 592 655 TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), ··· 988 925 show_dev(__entry), 989 926 (unsigned long long)__entry->blkstart, 990 927 (unsigned long long)__entry->blklen) 928 + ); 929 + 930 + TRACE_EVENT(f2fs_issue_flush, 931 + 932 + TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge), 933 + 934 + TP_ARGS(sb, nobarrier, flush_merge), 935 + 936 + TP_STRUCT__entry( 937 + __field(dev_t, dev) 938 + __field(bool, nobarrier) 939 + __field(bool, flush_merge) 940 + ), 941 + 942 + TP_fast_assign( 943 + __entry->dev = sb->s_dev; 944 + __entry->nobarrier = nobarrier; 945 + __entry->flush_merge = flush_merge; 946 + ), 947 + 948 + TP_printk("dev = (%d,%d), %s %s", 949 + show_dev(__entry), 950 + __entry->nobarrier ? "skip (nobarrier)" : "issue", 951 + __entry->flush_merge ? " with flush_merge" : "") 991 952 ); 992 953 #endif /* _TRACE_F2FS_H */ 993 954