Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-f2fs-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
"In this round, as Ted pointed out, fscrypto allows one more key prefix
given by filesystem to resolve backward compatibility issues. Other
than that, we've fixed several error handling cases by introducing
a fault injection facility. We've also achieved performance
improvement in some workloads as well as a bunch of bug fixes.

Summary:

Enhancements:
- fs-specific prefix for fscrypto
- fault injection facility
- expose validity bitmaps for user to be aware of fragmentation
- fallocate/rm/preallocation speed up
- use percpu counters

Bug fixes:
- some inline_dentry/inline_data bugs
- error handling for atomic/volatile/orphan inodes
- recover broken superblock"

* tag 'for-f2fs-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (73 commits)
f2fs: fix to update dirty page count correctly
f2fs: flush pending bios right away when error occurs
f2fs: avoid ENOSPC fault in the recovery process
f2fs: make exit_f2fs_fs more clear
f2fs: use percpu_counter for total_valid_inode_count
f2fs: use percpu_counter for alloc_valid_block_count
f2fs: use percpu_counter for # of dirty pages in inode
f2fs: use percpu_counter for page counters
f2fs: use bio count instead of F2FS_WRITEBACK page count
f2fs: manipulate dirty file inodes when DATA_FLUSH is set
f2fs: add fault injection to sysfs
f2fs: no need inc dirty pages under inode lock
f2fs: fix incorrect error path handling in f2fs_move_rehashed_dirents
f2fs: fix i_current_depth during inline dentry conversion
f2fs: correct return value type of f2fs_fill_super
f2fs: fix deadlock when flush inline data
f2fs: avoid f2fs_bug_on during recovery
f2fs: show # of orphan inodes
f2fs: support in batch fzero in dnode page
f2fs: support in batch multi blocks preallocation
...

+1451 -617
+1 -1
MAINTAINERS
··· 4793 4793 F2FS FILE SYSTEM 4794 4794 M: Jaegeuk Kim <jaegeuk@kernel.org> 4795 4795 M: Changman Lee <cm224.lee@samsung.com> 4796 - R: Chao Yu <chao2.yu@samsung.com> 4796 + R: Chao Yu <yuchao0@huawei.com> 4797 4797 L: linux-f2fs-devel@lists.sourceforge.net 4798 4798 W: http://en.wikipedia.org/wiki/F2FS 4799 4799 T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
+76 -44
fs/crypto/keyinfo.c
··· 78 78 return res; 79 79 } 80 80 81 + static int validate_user_key(struct fscrypt_info *crypt_info, 82 + struct fscrypt_context *ctx, u8 *raw_key, 83 + u8 *prefix, int prefix_size) 84 + { 85 + u8 *full_key_descriptor; 86 + struct key *keyring_key; 87 + struct fscrypt_key *master_key; 88 + const struct user_key_payload *ukp; 89 + int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1; 90 + int res; 91 + 92 + full_key_descriptor = kmalloc(full_key_len, GFP_NOFS); 93 + if (!full_key_descriptor) 94 + return -ENOMEM; 95 + 96 + memcpy(full_key_descriptor, prefix, prefix_size); 97 + sprintf(full_key_descriptor + prefix_size, 98 + "%*phN", FS_KEY_DESCRIPTOR_SIZE, 99 + ctx->master_key_descriptor); 100 + full_key_descriptor[full_key_len - 1] = '\0'; 101 + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); 102 + kfree(full_key_descriptor); 103 + if (IS_ERR(keyring_key)) 104 + return PTR_ERR(keyring_key); 105 + 106 + if (keyring_key->type != &key_type_logon) { 107 + printk_once(KERN_WARNING 108 + "%s: key type must be logon\n", __func__); 109 + res = -ENOKEY; 110 + goto out; 111 + } 112 + down_read(&keyring_key->sem); 113 + ukp = user_key_payload(keyring_key); 114 + if (ukp->datalen != sizeof(struct fscrypt_key)) { 115 + res = -EINVAL; 116 + up_read(&keyring_key->sem); 117 + goto out; 118 + } 119 + master_key = (struct fscrypt_key *)ukp->data; 120 + BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); 121 + 122 + if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { 123 + printk_once(KERN_WARNING 124 + "%s: key size incorrect: %d\n", 125 + __func__, master_key->size); 126 + res = -ENOKEY; 127 + up_read(&keyring_key->sem); 128 + goto out; 129 + } 130 + res = derive_key_aes(ctx->nonce, master_key->raw, raw_key); 131 + up_read(&keyring_key->sem); 132 + if (res) 133 + goto out; 134 + 135 + crypt_info->ci_keyring_key = keyring_key; 136 + return 0; 137 + out: 138 + key_put(keyring_key); 139 + return res; 140 + } 141 + 81 142 static void put_crypt_info(struct fscrypt_info *ci) 82 143 { 83 144 if (!ci) ··· 152 91 int get_crypt_info(struct inode *inode) 153 92 { 154 93 struct fscrypt_info *crypt_info; 155 - u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + 156 - (FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; 157 - struct key *keyring_key = NULL; 158 - struct fscrypt_key *master_key; 159 94 struct fscrypt_context ctx; 160 - const struct user_key_payload *ukp; 161 95 struct crypto_skcipher *ctfm; 162 96 const char *cipher_str; 163 97 u8 raw_key[FS_MAX_KEY_SIZE]; ··· 223 167 memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); 224 168 goto got_key; 225 169 } 226 - memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX, 227 - FS_KEY_DESC_PREFIX_SIZE); 228 - sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE, 229 - "%*phN", FS_KEY_DESCRIPTOR_SIZE, 230 - ctx.master_key_descriptor); 231 - full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + 232 - (2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0'; 233 - keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); 234 - if (IS_ERR(keyring_key)) { 235 - res = PTR_ERR(keyring_key); 236 - keyring_key = NULL; 237 - goto out; 238 - } 239 - crypt_info->ci_keyring_key = keyring_key; 240 - if (keyring_key->type != &key_type_logon) { 241 - printk_once(KERN_WARNING 242 - "%s: key type must be logon\n", __func__); 243 - res = -ENOKEY; 244 - goto out; 245 - } 246 - down_read(&keyring_key->sem); 247 - ukp = user_key_payload(keyring_key); 248 - if (ukp->datalen != sizeof(struct fscrypt_key)) { 249 - res = -EINVAL; 250 - up_read(&keyring_key->sem); 251 - goto out; 252 - } 253 - master_key = (struct fscrypt_key *)ukp->data; 254 - BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); 255 170 256 - if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { 257 - printk_once(KERN_WARNING 258 - "%s: key size incorrect: %d\n", 259 - __func__, master_key->size); 260 - res = -ENOKEY; 261 - up_read(&keyring_key->sem); 171 + res = validate_user_key(crypt_info, &ctx, raw_key, 172 + FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE); 173 + if (res && inode->i_sb->s_cop->key_prefix) { 174 + u8 *prefix = NULL; 175 + int prefix_size, res2; 176 + 177 + prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix); 178 + res2 = validate_user_key(crypt_info, &ctx, raw_key, 179 + prefix, prefix_size); 180 + if (res2) { 181 + if (res2 == -ENOKEY) 182 + res = -ENOKEY; 183 + goto out; 184 + } 185 + } else if (res) { 262 186 goto out; 263 187 } 264 - res = derive_key_aes(ctx.nonce, master_key->raw, raw_key); 265 - up_read(&keyring_key->sem); 266 - if (res) 267 - goto out; 268 188 got_key: 269 189 ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); 270 190 if (!ctfm || IS_ERR(ctfm)) {
+8
fs/f2fs/Kconfig
··· 94 94 information and block IO patterns in the filesystem level. 95 95 96 96 If unsure, say N. 97 + 98 + config F2FS_FAULT_INJECTION 99 + bool "F2FS fault injection facility" 100 + depends on F2FS_FS 101 + help 102 + Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on. 103 + 104 + If unsure, say N.
+2 -2
fs/f2fs/acl.c
··· 115 115 struct f2fs_acl_entry *entry; 116 116 int i; 117 117 118 - f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 118 + f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 119 119 sizeof(struct f2fs_acl_entry), GFP_NOFS); 120 120 if (!f2fs_acl) 121 121 return ERR_PTR(-ENOMEM); ··· 175 175 176 176 retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); 177 177 if (retval > 0) { 178 - value = kmalloc(retval, GFP_F2FS_ZERO); 178 + value = f2fs_kmalloc(retval, GFP_F2FS_ZERO); 179 179 if (!value) 180 180 return ERR_PTR(-ENOMEM); 181 181 retval = f2fs_getxattr(inode, name_index, "", value,
+36 -31
fs/f2fs/checkpoint.c
··· 26 26 static struct kmem_cache *ino_entry_slab; 27 27 struct kmem_cache *inode_entry_slab; 28 28 29 + void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) 30 + { 31 + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 32 + sbi->sb->s_flags |= MS_RDONLY; 33 + if (!end_io) 34 + f2fs_flush_merged_bios(sbi); 35 + } 36 + 29 37 /* 30 38 * We guarantee no failure on the returned page. 31 39 */ ··· 42 34 struct address_space *mapping = META_MAPPING(sbi); 43 35 struct page *page = NULL; 44 36 repeat: 45 - page = grab_cache_page(mapping, index); 37 + page = f2fs_grab_cache_page(mapping, index, false); 46 38 if (!page) { 47 39 cond_resched(); 48 40 goto repeat; ··· 72 64 if (unlikely(!is_meta)) 73 65 fio.rw &= ~REQ_META; 74 66 repeat: 75 - page = grab_cache_page(mapping, index); 67 + page = f2fs_grab_cache_page(mapping, index, false); 76 68 if (!page) { 77 69 cond_resched(); 78 70 goto repeat; ··· 99 91 * meta page. 100 92 */ 101 93 if (unlikely(!PageUptodate(page))) 102 - f2fs_stop_checkpoint(sbi); 94 + f2fs_stop_checkpoint(sbi, false); 103 95 out: 104 96 return page; 105 97 } ··· 194 186 BUG(); 195 187 } 196 188 197 - page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr); 189 + page = f2fs_grab_cache_page(META_MAPPING(sbi), 190 + fio.new_blkaddr, false); 198 191 if (!page) 199 192 continue; 200 193 if (PageUptodate(page)) { ··· 220 211 bool readahead = false; 221 212 222 213 page = find_get_page(META_MAPPING(sbi), index); 223 - if (!page || (page && !PageUptodate(page))) 214 + if (!page || !PageUptodate(page)) 224 215 readahead = true; 225 216 f2fs_put_page(page, 0); 226 217 ··· 457 448 return e ? true : false; 458 449 } 459 450 460 - void release_ino_entry(struct f2fs_sb_info *sbi) 451 + void release_ino_entry(struct f2fs_sb_info *sbi, bool all) 461 452 { 462 453 struct ino_entry *e, *tmp; 463 454 int i; 464 455 465 - for (i = APPEND_INO; i <= UPDATE_INO; i++) { 456 + for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 466 457 struct inode_management *im = &sbi->im[i]; 467 458 468 459 spin_lock(&im->ino_lock); ··· 482 473 int err = 0; 483 474 484 475 spin_lock(&im->ino_lock); 476 + 477 + #ifdef CONFIG_F2FS_FAULT_INJECTION 478 + if (time_to_inject(FAULT_ORPHAN)) { 479 + spin_unlock(&im->ino_lock); 480 + return -ENOSPC; 481 + } 482 + #endif 485 483 if (unlikely(im->ino_num >= sbi->max_orphans)) 486 484 err = -ENOSPC; 487 485 else ··· 793 777 !S_ISLNK(inode->i_mode)) 794 778 return; 795 779 796 - spin_lock(&sbi->inode_lock[type]); 797 - __add_dirty_inode(inode, type); 798 - inode_inc_dirty_pages(inode); 799 - spin_unlock(&sbi->inode_lock[type]); 780 + if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { 781 + spin_lock(&sbi->inode_lock[type]); 782 + __add_dirty_inode(inode, type); 783 + spin_unlock(&sbi->inode_lock[type]); 784 + } 800 785 786 + inode_inc_dirty_pages(inode); 801 787 SetPagePrivate(page); 802 788 f2fs_trace_pid(page); 803 - } 804 - 805 - void add_dirty_dir_inode(struct inode *inode) 806 - { 807 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 808 - 809 - spin_lock(&sbi->inode_lock[DIR_INODE]); 810 - __add_dirty_inode(inode, DIR_INODE); 811 - spin_unlock(&sbi->inode_lock[DIR_INODE]); 812 789 } 813 790 814 791 void remove_dirty_inode(struct inode *inode) 815 792 { 816 793 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 817 - struct f2fs_inode_info *fi = F2FS_I(inode); 818 794 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 819 795 820 796 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 821 797 !S_ISLNK(inode->i_mode)) 822 798 return; 823 799 800 + if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH)) 801 + return; 802 + 824 803 spin_lock(&sbi->inode_lock[type]); 825 804 __remove_dirty_inode(inode, type); 826 805 spin_unlock(&sbi->inode_lock[type]); 827 - 828 - /* Only from the recovery routine */ 829 - if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { 830 - clear_inode_flag(fi, FI_DELAY_IPUT); 831 - iput(inode); 832 - } 833 806 } 834 807 835 808 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) ··· 897 892 898 893 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 899 894 up_write(&sbi->node_write); 900 - err = sync_node_pages(sbi, 0, &wbc); 895 + err = sync_node_pages(sbi, &wbc); 901 896 if (err) { 902 897 f2fs_unlock_all(sbi); 903 898 goto out; ··· 922 917 for (;;) { 923 918 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 924 919 925 - if (!get_pages(sbi, F2FS_WRITEBACK)) 920 + if (!atomic_read(&sbi->nr_wb_bios)) 926 921 break; 927 922 928 923 io_schedule_timeout(5*HZ); ··· 1087 1082 1088 1083 /* update user_block_counts */ 1089 1084 sbi->last_valid_block_count = sbi->total_valid_block_count; 1090 - sbi->alloc_valid_block_count = 0; 1085 + percpu_counter_set(&sbi->alloc_valid_block_count, 0); 1091 1086 1092 1087 /* Here, we only have one bio having CP pack */ 1093 1088 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); ··· 1103 1098 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1104 1099 discard_blk); 1105 1100 1106 - release_ino_entry(sbi); 1101 + release_ino_entry(sbi, false); 1107 1102 1108 1103 if (unlikely(f2fs_cp_error(sbi))) 1109 1104 return -EIO;
+137 -60
fs/f2fs/data.c
··· 68 68 69 69 if (unlikely(bio->bi_error)) { 70 70 set_bit(AS_EIO, &page->mapping->flags); 71 - f2fs_stop_checkpoint(sbi); 71 + f2fs_stop_checkpoint(sbi, true); 72 72 } 73 73 end_page_writeback(page); 74 - dec_page_count(sbi, F2FS_WRITEBACK); 75 74 } 76 - 77 - if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait)) 75 + if (atomic_dec_and_test(&sbi->nr_wb_bios) && 76 + wq_has_sleeper(&sbi->cp_wait)) 78 77 wake_up(&sbi->cp_wait); 79 78 80 79 bio_put(bio); ··· 97 98 return bio; 98 99 } 99 100 101 + static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw, 102 + struct bio *bio) 103 + { 104 + if (!is_read_io(rw)) 105 + atomic_inc(&sbi->nr_wb_bios); 106 + submit_bio(rw, bio); 107 + } 108 + 100 109 static void __submit_merged_bio(struct f2fs_bio_info *io) 101 110 { 102 111 struct f2fs_io_info *fio = &io->fio; ··· 117 110 else 118 111 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 119 112 120 - submit_bio(fio->rw, io->bio); 113 + __submit_bio(io->sbi, fio->rw, io->bio); 121 114 io->bio = NULL; 122 115 } 123 116 ··· 235 228 return -EFAULT; 236 229 } 237 230 238 - submit_bio(fio->rw, bio); 231 + __submit_bio(fio->sbi, fio->rw, bio); 239 232 return 0; 240 233 } 241 234 ··· 254 247 verify_block_addr(sbi, fio->new_blkaddr); 255 248 256 249 down_write(&io->io_rwsem); 257 - 258 - if (!is_read) 259 - inc_page_count(sbi, F2FS_WRITEBACK); 260 250 261 251 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || 262 252 io->fio.rw != fio->rw)) ··· 282 278 trace_f2fs_submit_page_mbio(fio->page, fio); 283 279 } 284 280 281 + static void __set_data_blkaddr(struct dnode_of_data *dn) 282 + { 283 + struct f2fs_node *rn = F2FS_NODE(dn->node_page); 284 + __le32 *addr_array; 285 + 286 + /* Get physical address of data block */ 287 + addr_array = blkaddr_in_node(rn); 288 + addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 289 + } 290 + 285 291 /* 286 292 * Lock ordering for the change of data block address: 287 293 * ->data_page ··· 300 286 */ 301 287 void set_data_blkaddr(struct dnode_of_data *dn) 302 288 { 303 - struct f2fs_node *rn; 304 - __le32 *addr_array; 305 - struct page *node_page = dn->node_page; 306 - unsigned int ofs_in_node = dn->ofs_in_node; 307 - 308 - f2fs_wait_on_page_writeback(node_page, NODE, true); 309 - 310 - rn = F2FS_NODE(node_page); 311 - 312 - /* Get physical address of data block */ 313 - addr_array = blkaddr_in_node(rn); 314 - addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 315 - if (set_page_dirty(node_page)) 289 + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 290 + __set_data_blkaddr(dn); 291 + if (set_page_dirty(dn->node_page)) 316 292 dn->node_changed = true; 317 293 } 318 294 ··· 313 309 f2fs_update_extent_cache(dn); 314 310 } 315 311 316 - int reserve_new_block(struct dnode_of_data *dn) 312 + /* dn->ofs_in_node will be returned with up-to-date last block pointer */ 313 + int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) 317 314 { 318 315 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 319 316 317 + if (!count) 318 + return 0; 319 + 320 320 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 321 321 return -EPERM; 322 - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 322 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) 323 323 return -ENOSPC; 324 324 325 - trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 325 + trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, 326 + dn->ofs_in_node, count); 326 327 327 - dn->data_blkaddr = NEW_ADDR; 328 - set_data_blkaddr(dn); 328 + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 329 + 330 + for (; count > 0; dn->ofs_in_node++) { 331 + block_t blkaddr = 332 + datablock_addr(dn->node_page, dn->ofs_in_node); 333 + if (blkaddr == NULL_ADDR) { 334 + dn->data_blkaddr = NEW_ADDR; 335 + __set_data_blkaddr(dn); 336 + count--; 337 + } 338 + } 339 + 340 + if (set_page_dirty(dn->node_page)) 341 + dn->node_changed = true; 342 + 329 343 mark_inode_dirty(dn->inode); 330 344 sync_inode_page(dn); 331 345 return 0; 346 + } 347 + 348 + /* Should keep dn->ofs_in_node unchanged */ 349 + int reserve_new_block(struct dnode_of_data *dn) 350 + { 351 + unsigned int ofs_in_node = dn->ofs_in_node; 352 + int ret; 353 + 354 + ret = reserve_new_blocks(dn, 1); 355 + dn->ofs_in_node = ofs_in_node; 356 + return ret; 332 357 } 333 358 334 359 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) ··· 578 545 struct node_info ni; 579 546 int seg = CURSEG_WARM_DATA; 580 547 pgoff_t fofs; 548 + blkcnt_t count = 1; 581 549 582 550 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 583 551 return -EPERM; ··· 587 553 if (dn->data_blkaddr == NEW_ADDR) 588 554 goto alloc; 589 555 590 - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 556 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) 591 557 return -ENOSPC; 592 558 593 559 alloc: ··· 616 582 struct f2fs_map_blocks map; 617 583 ssize_t ret = 0; 618 584 619 - map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos); 620 - map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from)); 585 + map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 586 + map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from)); 621 587 map.m_next_pgofs = NULL; 622 588 623 589 if (f2fs_encrypted_inode(inode)) ··· 655 621 struct dnode_of_data dn; 656 622 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 657 623 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 658 - pgoff_t pgofs, end_offset; 624 + pgoff_t pgofs, end_offset, end; 659 625 int err = 0, ofs = 1; 626 + unsigned int ofs_in_node, last_ofs_in_node; 627 + blkcnt_t prealloc; 660 628 struct extent_info ei; 661 629 bool allocated = false; 662 630 block_t blkaddr; ··· 668 632 669 633 /* it only supports block size == page size */ 670 634 pgofs = (pgoff_t)map->m_lblk; 635 + end = pgofs + maxblocks; 671 636 672 637 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 673 638 map->m_pblk = ei.blk + pgofs - ei.fofs; ··· 685 648 set_new_dnode(&dn, inode, NULL, NULL, 0); 686 649 err = get_dnode_of_data(&dn, pgofs, mode); 687 650 if (err) { 651 + if (flag == F2FS_GET_BLOCK_BMAP) 652 + map->m_pblk = 0; 688 653 if (err == -ENOENT) { 689 654 err = 0; 690 655 if (map->m_next_pgofs) ··· 696 657 goto unlock_out; 697 658 } 698 659 660 + prealloc = 0; 661 + ofs_in_node = dn.ofs_in_node; 699 662 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 700 663 701 664 next_block: ··· 710 669 goto sync_out; 711 670 } 712 671 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 713 - if (blkaddr == NULL_ADDR) 714 - err = reserve_new_block(&dn); 672 + if (blkaddr == NULL_ADDR) { 673 + prealloc++; 674 + last_ofs_in_node = dn.ofs_in_node; 675 + } 715 676 } else { 716 677 err = __allocate_data_block(&dn); 678 + if (!err) { 679 + set_inode_flag(F2FS_I(inode), 680 + FI_APPEND_WRITE); 681 + allocated = true; 682 + } 717 683 } 718 684 if (err) 719 685 goto sync_out; 720 - allocated = true; 721 686 map->m_flags = F2FS_MAP_NEW; 722 687 blkaddr = dn.data_blkaddr; 723 688 } else { 689 + if (flag == F2FS_GET_BLOCK_BMAP) { 690 + map->m_pblk = 0; 691 + goto sync_out; 692 + } 724 693 if (flag == F2FS_GET_BLOCK_FIEMAP && 725 694 blkaddr == NULL_ADDR) { 726 695 if (map->m_next_pgofs) 727 696 *map->m_next_pgofs = pgofs + 1; 728 697 } 729 698 if (flag != F2FS_GET_BLOCK_FIEMAP || 730 - blkaddr != NEW_ADDR) { 731 - if (flag == F2FS_GET_BLOCK_BMAP) 732 - err = -ENOENT; 699 + blkaddr != NEW_ADDR) 733 700 goto sync_out; 734 - } 735 701 } 736 702 } 703 + 704 + if (flag == F2FS_GET_BLOCK_PRE_AIO) 705 + goto skip; 737 706 738 707 if (map->m_len == 0) { 739 708 /* preallocated unwritten block should be mapped for fiemap. */ ··· 756 705 } else if ((map->m_pblk != NEW_ADDR && 757 706 blkaddr == (map->m_pblk + ofs)) || 758 707 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 759 - flag == F2FS_GET_BLOCK_PRE_DIO || 760 - flag == F2FS_GET_BLOCK_PRE_AIO) { 708 + flag == F2FS_GET_BLOCK_PRE_DIO) { 761 709 ofs++; 762 710 map->m_len++; 763 711 } else { 764 712 goto sync_out; 765 713 } 766 714 715 + skip: 767 716 dn.ofs_in_node++; 768 717 pgofs++; 769 718 770 - if (map->m_len < maxblocks) { 771 - if (dn.ofs_in_node < end_offset) 772 - goto next_block; 719 + /* preallocate blocks in batch for one dnode page */ 720 + if (flag == F2FS_GET_BLOCK_PRE_AIO && 721 + (pgofs == end || dn.ofs_in_node == end_offset)) { 773 722 774 - if (allocated) 775 - sync_inode_page(&dn); 776 - f2fs_put_dnode(&dn); 723 + dn.ofs_in_node = ofs_in_node; 724 + err = reserve_new_blocks(&dn, prealloc); 725 + if (err) 726 + goto sync_out; 777 727 778 - if (create) { 779 - f2fs_unlock_op(sbi); 780 - f2fs_balance_fs(sbi, allocated); 728 + map->m_len += dn.ofs_in_node - ofs_in_node; 729 + if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 730 + err = -ENOSPC; 731 + goto sync_out; 781 732 } 782 - allocated = false; 783 - goto next_dnode; 733 + dn.ofs_in_node = end_offset; 784 734 } 735 + 736 + if (pgofs >= end) 737 + goto sync_out; 738 + else if (dn.ofs_in_node < end_offset) 739 + goto next_block; 740 + 741 + if (allocated) 742 + sync_inode_page(&dn); 743 + f2fs_put_dnode(&dn); 744 + 745 + if (create) { 746 + f2fs_unlock_op(sbi); 747 + f2fs_balance_fs(sbi, allocated); 748 + } 749 + allocated = false; 750 + goto next_dnode; 785 751 786 752 sync_out: 787 753 if (allocated) ··· 1051 983 */ 1052 984 if (bio && (last_block_in_bio != block_nr - 1)) { 1053 985 submit_and_realloc: 1054 - submit_bio(READ, bio); 986 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1055 987 bio = NULL; 1056 988 } 1057 989 if (bio == NULL) { ··· 1094 1026 goto next_page; 1095 1027 confused: 1096 1028 if (bio) { 1097 - submit_bio(READ, bio); 1029 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1098 1030 bio = NULL; 1099 1031 } 1100 1032 unlock_page(page); ··· 1104 1036 } 1105 1037 BUG_ON(pages && !list_empty(pages)); 1106 1038 if (bio) 1107 - submit_bio(READ, bio); 1039 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1108 1040 return 0; 1109 1041 } 1110 1042 ··· 1245 1177 goto redirty_out; 1246 1178 if (f2fs_is_drop_cache(inode)) 1247 1179 goto out; 1248 - if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && 1249 - available_free_memory(sbi, BASE_CHECK)) 1180 + /* we should not write 0'th page having journal header */ 1181 + if (f2fs_is_volatile_file(inode) && (!page->index || 1182 + (!wbc->for_reclaim && 1183 + available_free_memory(sbi, BASE_CHECK)))) 1250 1184 goto redirty_out; 1251 1185 1252 1186 /* Dentry blocks are controlled by checkpoint */ ··· 1550 1480 if (pos + len <= MAX_INLINE_DATA) { 1551 1481 read_inline_data(page, ipage); 1552 1482 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); 1553 - set_inline_node(ipage); 1483 + if (inode->i_nlink) 1484 + set_inline_node(ipage); 1554 1485 } else { 1555 1486 err = f2fs_convert_inline_page(&dn, page); 1556 1487 if (err) ··· 1567 1496 } else { 1568 1497 /* hole case */ 1569 1498 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1570 - if (err || (!err && dn.data_blkaddr == NULL_ADDR)) { 1499 + if (err || dn.data_blkaddr == NULL_ADDR) { 1571 1500 f2fs_put_dnode(&dn); 1572 1501 f2fs_lock_op(sbi); 1573 1502 locked = true; ··· 1754 1683 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1755 1684 1756 1685 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); 1757 - if (err < 0 && iov_iter_rw(iter) == WRITE) 1758 - f2fs_write_failed(mapping, offset + count); 1686 + if (iov_iter_rw(iter) == WRITE) { 1687 + if (err > 0) 1688 + set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1689 + else if (err < 0) 1690 + f2fs_write_failed(mapping, offset + count); 1691 + } 1759 1692 1760 1693 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); 1761 1694 ··· 1789 1714 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1790 1715 return; 1791 1716 1717 + set_page_private(page, 0); 1792 1718 ClearPagePrivate(page); 1793 1719 } 1794 1720 ··· 1803 1727 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1804 1728 return 0; 1805 1729 1730 + set_page_private(page, 0); 1806 1731 ClearPagePrivate(page); 1807 1732 return 1; 1808 1733 }
+15 -10
fs/f2fs/debug.c
··· 48 48 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; 49 49 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 50 50 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 51 - si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); 51 + si->wb_bios = atomic_read(&sbi->nr_wb_bios); 52 52 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 53 53 si->rsvd_segs = reserved_segments(sbi); 54 54 si->overp_segs = overprovision_segments(sbi); ··· 58 58 si->inline_xattr = atomic_read(&sbi->inline_xattr); 59 59 si->inline_inode = atomic_read(&sbi->inline_inode); 60 60 si->inline_dir = atomic_read(&sbi->inline_dir); 61 + si->orphans = sbi->im[ORPHAN_INO].ino_num; 61 62 si->utilization = utilization(sbi); 62 63 63 64 si->free_segs = free_segments(sbi); ··· 144 143 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; 145 144 si->base_mem += 2 * sizeof(struct f2fs_inode_info); 146 145 si->base_mem += sizeof(*sbi->ckpt); 146 + si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; 147 147 148 148 /* build sm */ 149 149 si->base_mem += sizeof(struct f2fs_sm_info); ··· 194 192 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 195 193 sizeof(struct nat_entry_set); 196 194 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 197 - for (i = 0; i <= UPDATE_INO; i++) 195 + for (i = 0; i <= ORPHAN_INO; i++) 198 196 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 199 197 si->cache_mem += atomic_read(&sbi->total_ext_tree) * 200 198 sizeof(struct extent_tree); ··· 218 216 list_for_each_entry(si, &f2fs_stat_list, stat_list) { 219 217 update_general_status(si->sbi); 220 218 221 - seq_printf(s, "\n=====[ partition info(%pg). #%d ]=====\n", 222 - si->sbi->sb->s_bdev, i++); 219 + seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", 220 + si->sbi->sb->s_bdev, i++, 221 + f2fs_readonly(si->sbi->sb) ? "RO": "RW"); 223 222 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", 224 223 si->sit_area_segs, si->nat_area_segs); 225 224 seq_printf(s, "[SSA: %d] [MAIN: %d", ··· 240 237 si->inline_inode); 241 238 seq_printf(s, " - Inline_dentry Inode: %u\n", 242 239 si->inline_dir); 240 + seq_printf(s, " - Orphan Inode: %u\n", 241 + si->orphans); 243 242 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 244 243 si->main_area_segs, si->main_area_sections, 245 244 si->main_area_zones); ··· 300 295 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 301 296 si->ext_tree, si->zombie_tree, si->ext_node); 302 297 seq_puts(s, "\nBalancing F2FS Async:\n"); 303 - seq_printf(s, " - inmem: %4d, wb: %4d\n", 304 - si->inmem_pages, si->wb_pages); 305 - seq_printf(s, " - nodes: %4d in %4d\n", 298 + seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", 299 + si->inmem_pages, si->wb_bios); 300 + seq_printf(s, " - nodes: %4lld in %4d\n", 306 301 si->ndirty_node, si->node_pages); 307 - seq_printf(s, " - dents: %4d in dirs:%4d\n", 302 + seq_printf(s, " - dents: %4lld in dirs:%4d\n", 308 303 si->ndirty_dent, si->ndirty_dirs); 309 - seq_printf(s, " - datas: %4d in files:%4d\n", 304 + seq_printf(s, " - datas: %4lld in files:%4d\n", 310 305 si->ndirty_data, si->ndirty_files); 311 - seq_printf(s, " - meta: %4d in %4d\n", 306 + seq_printf(s, " - meta: %4lld in %4d\n", 312 307 si->ndirty_meta, si->meta_pages); 313 308 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 314 309 si->dirty_nats, si->nats, si->dirty_sits, si->sits);
+69 -59
fs/f2fs/dir.c
··· 48 48 [F2FS_FT_SYMLINK] = DT_LNK, 49 49 }; 50 50 51 - #define S_SHIFT 12 52 51 static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { 53 52 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, 54 53 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, ··· 61 62 void set_de_type(struct f2fs_dir_entry *de, umode_t mode) 62 63 { 63 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 65 + } 66 + 67 + unsigned char get_de_type(struct f2fs_dir_entry *de) 68 + { 69 + if (de->file_type < F2FS_FT_MAX) 70 + return f2fs_filetype_table[de->file_type]; 71 + return DT_UNKNOWN; 64 72 } 65 73 66 74 static unsigned long dir_block_index(unsigned int level, ··· 101 95 else 102 96 kunmap(dentry_page); 103 97 104 - /* 105 - * For the most part, it should be a bug when name_len is zero. 106 - * We stop here for figuring out where the bugs has occurred. 107 - */ 108 - f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); 109 98 return de; 110 99 } 111 100 ··· 125 124 126 125 de = &d->dentry[bit_pos]; 127 126 127 + if (unlikely(!de->name_len)) { 128 + bit_pos++; 129 + continue; 130 + } 131 + 128 132 /* encrypted case */ 129 133 de_name.name = d->filename[bit_pos]; 130 134 de_name.len = le16_to_cpu(de->name_len); ··· 146 140 if (max_slots && max_len > *max_slots) 147 141 *max_slots = max_len; 148 142 max_len = 0; 149 - 150 - /* remain bug on condition */ 151 - if (unlikely(!de->name_len)) 152 - d->max = -1; 153 143 154 144 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 155 145 } ··· 391 389 return page; 392 390 393 391 if (S_ISDIR(inode->i_mode)) { 392 + /* in order to handle error case */ 393 + get_page(page); 394 394 err = make_empty_dir(inode, dir, page); 395 - if (err) 396 - goto error; 395 + if (err) { 396 + lock_page(page); 397 + goto put_error; 398 + } 399 + put_page(page); 397 400 } 398 401 399 402 err = f2fs_init_acl(inode, dir, page, dpage); ··· 442 435 return page; 443 436 444 437 put_error: 445 - f2fs_put_page(page, 1); 446 - error: 447 - /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 438 + /* truncate empty dir pages */ 448 439 truncate_inode_pages(&inode->i_data, 0); 449 - truncate_blocks(inode, 0, false); 450 - remove_dirty_inode(inode); 451 - remove_inode_page(inode); 440 + 441 + clear_nlink(inode); 442 + update_inode(inode, page); 443 + f2fs_put_page(page, 1); 452 444 return ERR_PTR(err); 453 445 } 454 446 ··· 515 509 } 516 510 } 517 511 518 - /* 519 - * Caller should grab and release a rwsem by calling f2fs_lock_op() and 520 - * f2fs_unlock_op(). 521 - */ 522 - int __f2fs_add_link(struct inode *dir, const struct qstr *name, 512 + int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, 523 513 struct inode *inode, nid_t ino, umode_t mode) 524 514 { 525 515 unsigned int bit_pos; ··· 528 526 struct f2fs_dentry_block *dentry_blk = NULL; 529 527 struct f2fs_dentry_ptr d; 530 528 struct page *page = NULL; 531 - struct fscrypt_name fname; 532 - struct qstr new_name; 533 - int slots, err; 534 - 535 - err = fscrypt_setup_filename(dir, name, 0, &fname); 536 - if (err) 537 - return err; 538 - 539 - new_name.name = fname_name(&fname); 540 - new_name.len = fname_len(&fname); 541 - 542 - if (f2fs_has_inline_dentry(dir)) { 543 - err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); 544 - if (!err || err != -EAGAIN) 545 - goto out; 546 - else 547 - err = 0; 548 - } 529 + int slots, err = 0; 549 530 550 531 level = 0; 551 - slots = GET_DENTRY_SLOTS(new_name.len); 552 - dentry_hash = f2fs_dentry_hash(&new_name); 532 + slots = GET_DENTRY_SLOTS(new_name->len); 533 + dentry_hash = f2fs_dentry_hash(new_name); 553 534 554 535 current_depth = F2FS_I(dir)->i_current_depth; 555 536 if (F2FS_I(dir)->chash == dentry_hash) { ··· 541 556 } 542 557 543 558 start: 544 - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { 545 - err = -ENOSPC; 546 - goto out; 547 - } 559 + #ifdef CONFIG_F2FS_FAULT_INJECTION 560 + if (time_to_inject(FAULT_DIR_DEPTH)) 561 + return -ENOSPC; 562 + #endif 563 + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) 564 + return -ENOSPC; 548 565 549 566 /* Increase the depth, if required */ 550 567 if (level == current_depth) ··· 560 573 561 574 for (block = bidx; block <= (bidx + nblock - 1); block++) { 562 575 dentry_page = get_new_data_page(dir, NULL, block, true); 563 - if (IS_ERR(dentry_page)) { 564 - err = PTR_ERR(dentry_page); 565 - goto out; 566 - } 576 + if (IS_ERR(dentry_page)) 577 + return PTR_ERR(dentry_page); 567 578 568 579 dentry_blk = kmap(dentry_page); 569 580 bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, ··· 581 596 582 597 if (inode) { 583 598 down_write(&F2FS_I(inode)->i_sem); 584 - page = init_inode_metadata(inode, dir, &new_name, NULL); 599 + page = init_inode_metadata(inode, dir, new_name, NULL); 585 600 if (IS_ERR(page)) { 586 601 err = PTR_ERR(page); 587 602 goto fail; ··· 591 606 } 592 607 593 608 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 594 - f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); 609 + f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); 595 610 596 611 set_page_dirty(dentry_page); 597 612 ··· 613 628 } 614 629 kunmap(dentry_page); 615 630 f2fs_put_page(dentry_page, 1); 616 - out: 631 + 632 + return err; 633 + } 634 + 635 + /* 636 + * Caller should grab and release a rwsem by calling f2fs_lock_op() and 637 + * f2fs_unlock_op(). 638 + */ 639 + int __f2fs_add_link(struct inode *dir, const struct qstr *name, 640 + struct inode *inode, nid_t ino, umode_t mode) 641 + { 642 + struct fscrypt_name fname; 643 + struct qstr new_name; 644 + int err; 645 + 646 + err = fscrypt_setup_filename(dir, name, 0, &fname); 647 + if (err) 648 + return err; 649 + 650 + new_name.name = fname_name(&fname); 651 + new_name.len = fname_len(&fname); 652 + 653 + err = -EAGAIN; 654 + if (f2fs_has_inline_dentry(dir)) 655 + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); 656 + if (err == -EAGAIN) 657 + err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode); 658 + 617 659 fscrypt_free_filename(&fname); 618 660 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); 619 661 return err; ··· 804 792 continue; 805 793 } 806 794 807 - if (de->file_type < F2FS_FT_MAX) 808 - d_type = f2fs_filetype_table[de->file_type]; 809 - else 810 - d_type = DT_UNKNOWN; 795 + d_type = get_de_type(de); 811 796 812 797 de_name.name = d->filename[bit_pos]; 813 798 de_name.len = le16_to_cpu(de->name_len); ··· 813 804 int save_len = fstr->len; 814 805 int ret; 815 806 816 - de_name.name = kmalloc(de_name.len, GFP_NOFS); 807 + de_name.name = f2fs_kmalloc(de_name.len, GFP_NOFS); 817 808 if (!de_name.name) 818 809 return false; 819 810 ··· 896 887 kunmap(dentry_page); 897 888 f2fs_put_page(dentry_page, 1); 898 889 } 890 + err = 0; 899 891 out: 900 892 fscrypt_fname_free_buffer(&fstr); 901 893 return err;
+1 -2
fs/f2fs/extent_cache.c
··· 196 196 if (!i_ext || !i_ext->len) 197 197 return false; 198 198 199 - set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 200 - le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 199 + get_extent_info(&ei, i_ext); 201 200 202 201 write_lock(&et->lock); 203 202 if (atomic_read(&et->node_cnt))
+143 -54
fs/f2fs/f2fs.h
··· 37 37 } while (0) 38 38 #endif 39 39 40 + #ifdef CONFIG_F2FS_FAULT_INJECTION 41 + enum { 42 + FAULT_KMALLOC, 43 + FAULT_PAGE_ALLOC, 44 + FAULT_ALLOC_NID, 45 + FAULT_ORPHAN, 46 + FAULT_BLOCK, 47 + FAULT_DIR_DEPTH, 48 + FAULT_MAX, 49 + }; 50 + 51 + struct f2fs_fault_info { 52 + atomic_t inject_ops; 53 + unsigned int inject_rate; 54 + unsigned int inject_type; 55 + }; 56 + 57 + extern struct f2fs_fault_info f2fs_fault; 58 + extern char *fault_name[FAULT_MAX]; 59 + #define IS_FAULT_SET(type) (f2fs_fault.inject_type & (1 << (type))) 60 + 61 + static inline bool time_to_inject(int type) 62 + { 63 + if (!f2fs_fault.inject_rate) 64 + return false; 65 + if (type == FAULT_KMALLOC && !IS_FAULT_SET(type)) 66 + return false; 67 + else if (type == FAULT_PAGE_ALLOC && !IS_FAULT_SET(type)) 68 + return false; 69 + else if (type == FAULT_ALLOC_NID && !IS_FAULT_SET(type)) 70 + return false; 71 + else if (type == FAULT_ORPHAN && !IS_FAULT_SET(type)) 72 + return false; 73 + else if (type == FAULT_BLOCK && !IS_FAULT_SET(type)) 74 + return false; 75 + else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type)) 76 + return false; 77 + 78 + atomic_inc(&f2fs_fault.inject_ops); 79 + if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { 80 + atomic_set(&f2fs_fault.inject_ops, 0); 81 + printk("%sF2FS-fs : inject %s in %pF\n", 82 + KERN_INFO, 83 + fault_name[type], 84 + __builtin_return_address(0)); 85 + return true; 86 + } 87 + return false; 88 + } 89 + #endif 90 + 40 91 /* 41 92 * For mount options 42 93 */ ··· 107 56 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 108 57 #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 109 58 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 59 + #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 110 60 111 61 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 112 62 #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) ··· 211 159 struct inode *inode; /* vfs inode pointer */ 212 160 block_t blkaddr; /* block address locating the last fsync */ 213 161 block_t last_dentry; /* block address locating the last dentry */ 214 - block_t last_inode; /* block address locating the last inode */ 215 162 }; 216 163 217 164 #define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) ··· 436 385 /* Use below internally in f2fs*/ 437 386 unsigned long flags; /* use to pass per-file flags */ 438 387 struct rw_semaphore i_sem; /* protect fi info */ 439 - atomic_t dirty_pages; /* # of dirty pages */ 388 + struct percpu_counter dirty_pages; /* # of dirty pages */ 440 389 f2fs_hash_t chash; /* hash value of given file name */ 441 390 unsigned int clevel; /* maximum level of given file name */ 442 391 nid_t i_xattr_nid; /* node id that contains xattrs */ ··· 449 398 }; 450 399 451 400 static inline void get_extent_info(struct extent_info *ext, 452 - struct f2fs_extent i_ext) 401 + struct f2fs_extent *i_ext) 453 402 { 454 - ext->fofs = le32_to_cpu(i_ext.fofs); 455 - ext->blk = le32_to_cpu(i_ext.blk); 456 - ext->len = le32_to_cpu(i_ext.len); 403 + ext->fofs = le32_to_cpu(i_ext->fofs); 404 + ext->blk = le32_to_cpu(i_ext->blk); 405 + ext->len = le32_to_cpu(i_ext->len); 457 406 } 458 407 459 408 static inline void set_raw_extent(struct extent_info *ext, ··· 650 599 * dirty dentry blocks, dirty node blocks, and dirty meta blocks. 651 600 */ 652 601 enum count_type { 653 - F2FS_WRITEBACK, 654 602 F2FS_DIRTY_DENTS, 655 603 F2FS_DIRTY_DATA, 656 604 F2FS_DIRTY_NODES, ··· 722 672 SBI_IS_CLOSE, /* specify unmounting */ 723 673 SBI_NEED_FSCK, /* need fsck.f2fs to fix */ 724 674 SBI_POR_DOING, /* recovery is doing or not */ 675 + SBI_NEED_SB_WRITE, /* need to recover superblock */ 725 676 }; 726 677 727 678 enum { ··· 731 680 MAX_TIME, 732 681 }; 733 682 683 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 684 + #define F2FS_KEY_DESC_PREFIX "f2fs:" 685 + #define F2FS_KEY_DESC_PREFIX_SIZE 5 686 + #endif 734 687 struct f2fs_sb_info { 735 688 struct super_block *sb; /* pointer to VFS super block */ 736 689 struct proc_dir_entry *s_proc; /* proc entry */ ··· 742 687 int valid_super_block; /* valid super block no */ 743 688 int s_flag; /* flags for sbi */ 744 689 690 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 691 + u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; 692 + u8 key_prefix_size; 693 + #endif 745 694 /* for node-related operations */ 746 695 struct f2fs_nm_info *nm_info; /* node manager */ 747 696 struct inode *node_inode; /* cache node blocks */ ··· 801 742 unsigned int total_sections; /* total section count */ 802 743 unsigned int total_node_count; /* total node block count */ 803 744 unsigned int total_valid_node_count; /* valid node block count */ 804 - unsigned int total_valid_inode_count; /* valid inode count */ 805 745 loff_t max_file_blocks; /* max block index of file */ 806 746 int active_logs; /* # of active logs */ 807 747 int dir_level; /* directory level */ 808 748 809 749 block_t user_block_count; /* # of user blocks */ 810 750 block_t total_valid_block_count; /* # of valid blocks */ 811 - block_t alloc_valid_block_count; /* # of allocated blocks */ 812 751 block_t discard_blks; /* discard command candidats */ 813 752 block_t last_valid_block_count; /* for recovery */ 814 753 u32 s_next_generation; /* for NFS support */ 815 - atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ 754 + atomic_t nr_wb_bios; /* # of writeback bios */ 755 + 756 + /* # of pages, see count_type */ 757 + struct percpu_counter nr_pages[NR_COUNT_TYPE]; 758 + /* # of allocated blocks */ 759 + struct percpu_counter alloc_valid_block_count; 760 + 761 + /* valid inode count */ 762 + struct percpu_counter total_valid_inode_count; 816 763 817 764 struct f2fs_mount_info mount_opt; /* mount options */ 818 765 ··· 1120 1055 } 1121 1056 1122 1057 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 1123 - struct inode *inode, blkcnt_t count) 1058 + struct inode *inode, blkcnt_t *count) 1124 1059 { 1125 1060 block_t valid_block_count; 1126 1061 1127 1062 spin_lock(&sbi->stat_lock); 1128 - valid_block_count = 1129 - sbi->total_valid_block_count + (block_t)count; 1130 - if (unlikely(valid_block_count > sbi->user_block_count)) { 1063 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1064 + if (time_to_inject(FAULT_BLOCK)) { 1131 1065 spin_unlock(&sbi->stat_lock); 1132 1066 return false; 1133 1067 } 1134 - inode->i_blocks += count; 1135 - sbi->total_valid_block_count = valid_block_count; 1136 - sbi->alloc_valid_block_count += (block_t)count; 1068 + #endif 1069 + valid_block_count = 1070 + sbi->total_valid_block_count + (block_t)(*count); 1071 + if (unlikely(valid_block_count > sbi->user_block_count)) { 1072 + *count = sbi->user_block_count - sbi->total_valid_block_count; 1073 + if (!*count) { 1074 + spin_unlock(&sbi->stat_lock); 1075 + return false; 1076 + } 1077 + } 1078 + /* *count can be recalculated */ 1079 + inode->i_blocks += *count; 1080 + sbi->total_valid_block_count = 1081 + sbi->total_valid_block_count + (block_t)(*count); 1137 1082 spin_unlock(&sbi->stat_lock); 1083 + 1084 + percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); 1138 1085 return true; 1139 1086 } 1140 1087 ··· 1164 1087 1165 1088 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 1166 1089 { 1167 - atomic_inc(&sbi->nr_pages[count_type]); 1090 + percpu_counter_inc(&sbi->nr_pages[count_type]); 1168 1091 set_sbi_flag(sbi, SBI_IS_DIRTY); 1169 1092 } 1170 1093 1171 1094 static inline void inode_inc_dirty_pages(struct inode *inode) 1172 1095 { 1173 - atomic_inc(&F2FS_I(inode)->dirty_pages); 1096 + percpu_counter_inc(&F2FS_I(inode)->dirty_pages); 1174 1097 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1175 1098 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1176 1099 } 1177 1100 1178 1101 static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1179 1102 { 1180 - atomic_dec(&sbi->nr_pages[count_type]); 1103 + percpu_counter_dec(&sbi->nr_pages[count_type]); 1181 1104 } 1182 1105 1183 1106 static inline void inode_dec_dirty_pages(struct inode *inode) ··· 1186 1109 !S_ISLNK(inode->i_mode)) 1187 1110 return; 1188 1111 1189 - atomic_dec(&F2FS_I(inode)->dirty_pages); 1112 + percpu_counter_dec(&F2FS_I(inode)->dirty_pages); 1190 1113 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1191 1114 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1192 1115 } 1193 1116 1194 - static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 1117 + static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) 1195 1118 { 1196 - return atomic_read(&sbi->nr_pages[count_type]); 1119 + return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); 1197 1120 } 1198 1121 1199 - static inline int get_dirty_pages(struct inode *inode) 1122 + static inline s64 get_dirty_pages(struct inode *inode) 1200 1123 { 1201 - return atomic_read(&F2FS_I(inode)->dirty_pages); 1124 + return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages); 1202 1125 } 1203 1126 1204 1127 static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 1205 1128 { 1206 1129 unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; 1207 - return ((get_pages(sbi, block_type) + pages_per_sec - 1) 1208 - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; 1130 + unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> 1131 + sbi->log_blocks_per_seg; 1132 + 1133 + return segs / sbi->segs_per_sec; 1209 1134 } 1210 1135 1211 1136 static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) ··· 1296 1217 if (inode) 1297 1218 inode->i_blocks++; 1298 1219 1299 - sbi->alloc_valid_block_count++; 1300 1220 sbi->total_valid_node_count++; 1301 1221 sbi->total_valid_block_count++; 1302 1222 spin_unlock(&sbi->stat_lock); 1303 1223 1224 + percpu_counter_inc(&sbi->alloc_valid_block_count); 1304 1225 return true; 1305 1226 } 1306 1227 ··· 1327 1248 1328 1249 static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 1329 1250 { 1330 - spin_lock(&sbi->stat_lock); 1331 - f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); 1332 - sbi->total_valid_inode_count++; 1333 - spin_unlock(&sbi->stat_lock); 1251 + percpu_counter_inc(&sbi->total_valid_inode_count); 1334 1252 } 1335 1253 1336 1254 static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 1337 1255 { 1338 - spin_lock(&sbi->stat_lock); 1339 - f2fs_bug_on(sbi, !sbi->total_valid_inode_count); 1340 - sbi->total_valid_inode_count--; 1341 - spin_unlock(&sbi->stat_lock); 1256 + percpu_counter_dec(&sbi->total_valid_inode_count); 1342 1257 } 1343 1258 1344 - static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 1259 + static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) 1345 1260 { 1346 - return sbi->total_valid_inode_count; 1261 + return percpu_counter_sum_positive(&sbi->total_valid_inode_count); 1347 1262 } 1348 1263 1349 1264 static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, 1350 1265 pgoff_t index, bool for_write) 1351 1266 { 1267 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1268 + struct page *page = find_lock_page(mapping, index); 1269 + if (page) 1270 + return page; 1271 + 1272 + if (time_to_inject(FAULT_PAGE_ALLOC)) 1273 + return NULL; 1274 + #endif 1352 1275 if (!for_write) 1353 1276 return grab_cache_page(mapping, index); 1354 1277 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); ··· 1516 1435 FI_NO_ALLOC, /* should not allocate any blocks */ 1517 1436 FI_FREE_NID, /* free allocated nide */ 1518 1437 FI_UPDATE_DIR, /* should update inode block for consistency */ 1519 - FI_DELAY_IPUT, /* used for the recovery */ 1520 1438 FI_NO_EXTENT, /* not to use the extent cache */ 1521 1439 FI_INLINE_XATTR, /* used for inline xattr */ 1522 1440 FI_INLINE_DATA, /* used for inline data*/ ··· 1698 1618 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1699 1619 } 1700 1620 1701 - static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) 1702 - { 1703 - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1704 - sbi->sb->s_flags |= MS_RDONLY; 1705 - } 1706 - 1707 1621 static inline bool is_dot_dotdot(const struct qstr *str) 1708 1622 { 1709 1623 if (str->len == 1 && str->name[0] == '.') ··· 1716 1642 return false; 1717 1643 1718 1644 return S_ISREG(inode->i_mode); 1645 + } 1646 + 1647 + static inline void *f2fs_kmalloc(size_t size, gfp_t flags) 1648 + { 1649 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1650 + if (time_to_inject(FAULT_KMALLOC)) 1651 + return NULL; 1652 + #endif 1653 + return kmalloc(size, flags); 1719 1654 } 1720 1655 1721 1656 static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) ··· 1793 1710 */ 1794 1711 extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1795 1712 void set_de_type(struct f2fs_dir_entry *, umode_t); 1796 - 1713 + unsigned char get_de_type(struct f2fs_dir_entry *); 1797 1714 struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 1798 1715 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 1799 1716 bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, ··· 1814 1731 int update_dent_inode(struct inode *, struct inode *, const struct qstr *); 1815 1732 void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, 1816 1733 const struct qstr *, f2fs_hash_t , unsigned int); 1734 + int f2fs_add_regular_entry(struct inode *, const struct qstr *, 1735 + struct inode *, nid_t, umode_t); 1817 1736 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, 1818 1737 umode_t); 1819 1738 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, ··· 1866 1781 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1867 1782 struct page *get_node_page_ra(struct page *, int); 1868 1783 void sync_inode_page(struct dnode_of_data *); 1869 - int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); 1784 + void move_node_page(struct page *, int); 1785 + int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, 1786 + bool); 1787 + int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); 1870 1788 bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1871 1789 void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1872 1790 void alloc_nid_failed(struct f2fs_sb_info *, nid_t); ··· 1931 1843 /* 1932 1844 * checkpoint.c 1933 1845 */ 1846 + void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool); 1934 1847 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1935 1848 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1936 1849 struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); ··· 1941 1852 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1942 1853 void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1943 1854 void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1944 - void release_ino_entry(struct f2fs_sb_info *); 1855 + void release_ino_entry(struct f2fs_sb_info *, bool); 1945 1856 bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1946 1857 int acquire_orphan_inode(struct f2fs_sb_info *); 1947 1858 void release_orphan_inode(struct f2fs_sb_info *); ··· 1950 1861 int recover_orphan_inodes(struct f2fs_sb_info *); 1951 1862 int get_valid_checkpoint(struct f2fs_sb_info *); 1952 1863 void update_dirty_page(struct inode *, struct page *); 1953 - void add_dirty_dir_inode(struct inode *); 1954 1864 void remove_dirty_inode(struct inode *); 1955 1865 int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); 1956 1866 int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); ··· 1968 1880 void f2fs_submit_page_mbio(struct f2fs_io_info *); 1969 1881 void set_data_blkaddr(struct dnode_of_data *); 1970 1882 void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 1883 + int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); 1971 1884 int reserve_new_block(struct dnode_of_data *); 1972 1885 int f2fs_get_block(struct dnode_of_data *, pgoff_t); 1973 1886 ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); ··· 1995 1906 /* 1996 1907 * recovery.c 1997 1908 */ 1998 - int recover_fsync_data(struct f2fs_sb_info *); 1909 + int recover_fsync_data(struct f2fs_sb_info *, bool); 1999 1910 bool space_for_roll_forward(struct f2fs_sb_info *); 2000 1911 2001 1912 /* ··· 2010 1921 unsigned long long hit_largest, hit_cached, hit_rbtree; 2011 1922 unsigned long long hit_total, total_ext; 2012 1923 int ext_tree, zombie_tree, ext_node; 2013 - int ndirty_node, ndirty_meta; 2014 - int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; 1924 + s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; 1925 + unsigned int ndirty_dirs, ndirty_files; 2015 1926 int nats, dirty_nats, sits, dirty_sits, fnids; 2016 1927 int total_count, utilization; 2017 - int bg_gc, inmem_pages, wb_pages; 2018 - int inline_xattr, inline_inode, inline_dir; 1928 + int bg_gc, wb_bios; 1929 + int inline_xattr, inline_inode, inline_dir, orphans; 2019 1930 unsigned int valid_count, valid_node_count, valid_inode_count; 2020 1931 unsigned int bimodal, avg_vblocks; 2021 1932 int util_free, util_valid, util_invalid;
+223 -90
fs/f2fs/file.c
··· 182 182 } 183 183 } 184 184 185 - int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 185 + static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 186 + int datasync, bool atomic) 186 187 { 187 188 struct inode *inode = file->f_mapping->host; 188 189 struct f2fs_inode_info *fi = F2FS_I(inode); ··· 257 256 goto out; 258 257 } 259 258 sync_nodes: 260 - sync_node_pages(sbi, ino, &wbc); 259 + ret = fsync_node_pages(sbi, ino, &wbc, atomic); 260 + if (ret) 261 + goto out; 261 262 262 263 /* if cp_error was enabled, we should avoid infinite loop */ 263 264 if (unlikely(f2fs_cp_error(sbi))) { ··· 289 286 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 290 287 f2fs_trace_ios(NULL, 1); 291 288 return ret; 289 + } 290 + 291 + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 292 + { 293 + return f2fs_do_sync_file(file, start, end, datasync, false); 292 294 } 293 295 294 296 static pgoff_t __get_first_dirty_index(struct address_space *mapping, ··· 563 555 564 556 free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); 565 557 558 + if (free_from >= sbi->max_file_blocks) 559 + goto free_partial; 560 + 566 561 if (lock) 567 562 f2fs_lock_op(sbi); 568 563 ··· 584 573 } 585 574 586 575 set_new_dnode(&dn, inode, ipage, NULL, 0); 587 - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 576 + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 588 577 if (err) { 589 578 if (err == -ENOENT) 590 579 goto free_next; ··· 607 596 out: 608 597 if (lock) 609 598 f2fs_unlock_op(sbi); 610 - 599 + free_partial: 611 600 /* lastly zero out the first data page */ 612 601 if (!err) 613 602 err = truncate_partial_data_page(inode, from, truncate_page); ··· 997 986 return ret; 998 987 } 999 988 989 + static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 990 + pgoff_t end) 991 + { 992 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 993 + pgoff_t index = start; 994 + unsigned int ofs_in_node = dn->ofs_in_node; 995 + blkcnt_t count = 0; 996 + int ret; 997 + 998 + for (; index < end; index++, dn->ofs_in_node++) { 999 + if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR) 1000 + count++; 1001 + } 1002 + 1003 + dn->ofs_in_node = ofs_in_node; 1004 + ret = reserve_new_blocks(dn, count); 1005 + if (ret) 1006 + return ret; 1007 + 1008 + dn->ofs_in_node = ofs_in_node; 1009 + for (index = start; index < end; index++, dn->ofs_in_node++) { 1010 + dn->data_blkaddr = 1011 + datablock_addr(dn->node_page, dn->ofs_in_node); 1012 + /* 1013 + * reserve_new_blocks will not guarantee entire block 1014 + * allocation. 1015 + */ 1016 + if (dn->data_blkaddr == NULL_ADDR) { 1017 + ret = -ENOSPC; 1018 + break; 1019 + } 1020 + if (dn->data_blkaddr != NEW_ADDR) { 1021 + invalidate_blocks(sbi, dn->data_blkaddr); 1022 + dn->data_blkaddr = NEW_ADDR; 1023 + set_data_blkaddr(dn); 1024 + } 1025 + } 1026 + 1027 + f2fs_update_extent_cache_range(dn, start, 0, index - start); 1028 + 1029 + return ret; 1030 + } 1031 + 1000 1032 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1001 1033 int mode) 1002 1034 { ··· 1090 1036 (loff_t)pg_start << PAGE_SHIFT); 1091 1037 } 1092 1038 1093 - for (index = pg_start; index < pg_end; index++) { 1039 + for (index = pg_start; index < pg_end;) { 1094 1040 struct dnode_of_data dn; 1095 - struct page *ipage; 1041 + unsigned int end_offset; 1042 + pgoff_t end; 1096 1043 1097 1044 f2fs_lock_op(sbi); 1098 1045 1099 - ipage = get_node_page(sbi, inode->i_ino); 1100 - if (IS_ERR(ipage)) { 1101 - ret = PTR_ERR(ipage); 1102 - f2fs_unlock_op(sbi); 1103 - goto out; 1104 - } 1105 - 1106 - set_new_dnode(&dn, inode, ipage, NULL, 0); 1107 - ret = f2fs_reserve_block(&dn, index); 1046 + set_new_dnode(&dn, inode, NULL, NULL, 0); 1047 + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); 1108 1048 if (ret) { 1109 1049 f2fs_unlock_op(sbi); 1110 1050 goto out; 1111 1051 } 1112 1052 1113 - if (dn.data_blkaddr != NEW_ADDR) { 1114 - invalidate_blocks(sbi, dn.data_blkaddr); 1115 - f2fs_update_data_blkaddr(&dn, NEW_ADDR); 1116 - } 1053 + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 1054 + end = min(pg_end, end_offset - dn.ofs_in_node + index); 1055 + 1056 + ret = f2fs_do_zero_range(&dn, index, end); 1117 1057 f2fs_put_dnode(&dn); 1118 1058 f2fs_unlock_op(sbi); 1059 + if (ret) 1060 + goto out; 1119 1061 1062 + index = end; 1120 1063 new_size = max_t(loff_t, new_size, 1121 - (loff_t)(index + 1) << PAGE_SHIFT); 1064 + (loff_t)index << PAGE_SHIFT); 1122 1065 } 1123 1066 1124 1067 if (off_end) { ··· 1198 1147 loff_t len, int mode) 1199 1148 { 1200 1149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1201 - pgoff_t index, pg_start, pg_end; 1150 + struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; 1151 + pgoff_t pg_end; 1202 1152 loff_t new_size = i_size_read(inode); 1203 - loff_t off_start, off_end; 1204 - int ret = 0; 1153 + loff_t off_end; 1154 + int ret; 1205 1155 1206 1156 ret = inode_newsize_ok(inode, (len + offset)); 1207 1157 if (ret) ··· 1214 1162 1215 1163 f2fs_balance_fs(sbi, true); 1216 1164 1217 - pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1218 - pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1219 - 1220 - off_start = offset & (PAGE_SIZE - 1); 1165 + pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1221 1166 off_end = (offset + len) & (PAGE_SIZE - 1); 1222 1167 1223 - f2fs_lock_op(sbi); 1168 + map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT; 1169 + map.m_len = pg_end - map.m_lblk; 1170 + if (off_end) 1171 + map.m_len++; 1224 1172 1225 - for (index = pg_start; index <= pg_end; index++) { 1226 - struct dnode_of_data dn; 1173 + ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); 1174 + if (ret) { 1175 + pgoff_t last_off; 1227 1176 1228 - if (index == pg_end && !off_end) 1229 - goto noalloc; 1177 + if (!map.m_len) 1178 + return ret; 1230 1179 1231 - set_new_dnode(&dn, inode, NULL, NULL, 0); 1232 - ret = f2fs_reserve_block(&dn, index); 1233 - if (ret) 1234 - break; 1235 - noalloc: 1236 - if (pg_start == pg_end) 1237 - new_size = offset + len; 1238 - else if (index == pg_start && off_start) 1239 - new_size = (loff_t)(index + 1) << PAGE_SHIFT; 1240 - else if (index == pg_end) 1241 - new_size = ((loff_t)index << PAGE_SHIFT) + 1242 - off_end; 1243 - else 1244 - new_size += PAGE_SIZE; 1180 + last_off = map.m_lblk + map.m_len - 1; 1181 + 1182 + /* update new size to the failed position */ 1183 + new_size = (last_off == pg_end) ? offset + len: 1184 + (loff_t)(last_off + 1) << PAGE_SHIFT; 1185 + } else { 1186 + new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1245 1187 } 1246 1188 1247 - if (!(mode & FALLOC_FL_KEEP_SIZE) && 1248 - i_size_read(inode) < new_size) { 1189 + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { 1249 1190 i_size_write(inode, new_size); 1250 1191 mark_inode_dirty(inode); 1251 1192 update_inode_page(inode); 1252 1193 } 1253 - f2fs_unlock_op(sbi); 1254 1194 1255 1195 return ret; 1256 1196 } ··· 1298 1254 1299 1255 static int f2fs_release_file(struct inode *inode, struct file *filp) 1300 1256 { 1257 + /* 1258 + * f2fs_relase_file is called at every close calls. So we should 1259 + * not drop any inmemory pages by close called by other process. 1260 + */ 1261 + if (!(filp->f_mode & FMODE_WRITE) || 1262 + atomic_read(&inode->i_writecount) != 1) 1263 + return 0; 1264 + 1301 1265 /* some remained atomic pages should discarded */ 1302 1266 if (f2fs_is_atomic_file(inode)) 1303 1267 drop_inmem_pages(inode); 1304 1268 if (f2fs_is_volatile_file(inode)) { 1269 + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1305 1270 set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); 1306 1271 filemap_fdatawrite(inode->i_mapping); 1307 1272 clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); ··· 1347 1294 unsigned int oldflags; 1348 1295 int ret; 1349 1296 1297 + if (!inode_owner_or_capable(inode)) 1298 + return -EACCES; 1299 + 1300 + if (get_user(flags, (int __user *)arg)) 1301 + return -EFAULT; 1302 + 1350 1303 ret = mnt_want_write_file(filp); 1351 1304 if (ret) 1352 1305 return ret; 1353 - 1354 - if (!inode_owner_or_capable(inode)) { 1355 - ret = -EACCES; 1356 - goto out; 1357 - } 1358 - 1359 - if (get_user(flags, (int __user *)arg)) { 1360 - ret = -EFAULT; 1361 - goto out; 1362 - } 1363 1306 1364 1307 flags = f2fs_mask_flags(inode->i_mode, flags); 1365 1308 ··· 1399 1350 if (!inode_owner_or_capable(inode)) 1400 1351 return -EACCES; 1401 1352 1353 + ret = mnt_want_write_file(filp); 1354 + if (ret) 1355 + return ret; 1356 + 1357 + inode_lock(inode); 1358 + 1402 1359 if (f2fs_is_atomic_file(inode)) 1403 - return 0; 1360 + goto out; 1404 1361 1405 1362 ret = f2fs_convert_inline_inode(inode); 1406 1363 if (ret) 1407 - return ret; 1364 + goto out; 1408 1365 1409 1366 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1410 1367 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1411 1368 1412 - return 0; 1369 + if (!get_dirty_pages(inode)) 1370 + goto out; 1371 + 1372 + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, 1373 + "Unexpected flush for atomic writes: ino=%lu, npages=%lld", 1374 + inode->i_ino, get_dirty_pages(inode)); 1375 + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 1376 + if (ret) 1377 + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1378 + out: 1379 + inode_unlock(inode); 1380 + mnt_drop_write_file(filp); 1381 + return ret; 1413 1382 } 1414 1383 1415 1384 static int f2fs_ioc_commit_atomic_write(struct file *filp) ··· 1438 1371 if (!inode_owner_or_capable(inode)) 1439 1372 return -EACCES; 1440 1373 1441 - if (f2fs_is_volatile_file(inode)) 1442 - return 0; 1443 - 1444 1374 ret = mnt_want_write_file(filp); 1445 1375 if (ret) 1446 1376 return ret; 1377 + 1378 + inode_lock(inode); 1379 + 1380 + if (f2fs_is_volatile_file(inode)) 1381 + goto err_out; 1447 1382 1448 1383 if (f2fs_is_atomic_file(inode)) { 1449 1384 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); ··· 1456 1387 } 1457 1388 } 1458 1389 1459 - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1390 + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1460 1391 err_out: 1392 + inode_unlock(inode); 1461 1393 mnt_drop_write_file(filp); 1462 1394 return ret; 1463 1395 } ··· 1471 1401 if (!inode_owner_or_capable(inode)) 1472 1402 return -EACCES; 1473 1403 1474 - if (f2fs_is_volatile_file(inode)) 1475 - return 0; 1476 - 1477 - ret = f2fs_convert_inline_inode(inode); 1404 + ret = mnt_want_write_file(filp); 1478 1405 if (ret) 1479 1406 return ret; 1480 1407 1408 + inode_lock(inode); 1409 + 1410 + if (f2fs_is_volatile_file(inode)) 1411 + goto out; 1412 + 1413 + ret = f2fs_convert_inline_inode(inode); 1414 + if (ret) 1415 + goto out; 1416 + 1481 1417 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1482 1418 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1483 - return 0; 1419 + out: 1420 + inode_unlock(inode); 1421 + mnt_drop_write_file(filp); 1422 + return ret; 1484 1423 } 1485 1424 1486 1425 static int f2fs_ioc_release_volatile_write(struct file *filp) 1487 1426 { 1488 1427 struct inode *inode = file_inode(filp); 1428 + int ret; 1489 1429 1490 1430 if (!inode_owner_or_capable(inode)) 1491 1431 return -EACCES; 1492 1432 1433 + ret = mnt_want_write_file(filp); 1434 + if (ret) 1435 + return ret; 1436 + 1437 + inode_lock(inode); 1438 + 1493 1439 if (!f2fs_is_volatile_file(inode)) 1494 - return 0; 1440 + goto out; 1495 1441 1496 - if (!f2fs_is_first_block_written(inode)) 1497 - return truncate_partial_data_page(inode, 0, true); 1442 + if (!f2fs_is_first_block_written(inode)) { 1443 + ret = truncate_partial_data_page(inode, 0, true); 1444 + goto out; 1445 + } 1498 1446 1499 - return punch_hole(inode, 0, F2FS_BLKSIZE); 1447 + ret = punch_hole(inode, 0, F2FS_BLKSIZE); 1448 + out: 1449 + inode_unlock(inode); 1450 + mnt_drop_write_file(filp); 1451 + return ret; 1500 1452 } 1501 1453 1502 1454 static int f2fs_ioc_abort_volatile_write(struct file *filp) ··· 1533 1441 if (ret) 1534 1442 return ret; 1535 1443 1536 - if (f2fs_is_atomic_file(inode)) { 1537 - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1444 + inode_lock(inode); 1445 + 1446 + if (f2fs_is_atomic_file(inode)) 1538 1447 drop_inmem_pages(inode); 1539 - } 1540 1448 if (f2fs_is_volatile_file(inode)) { 1541 1449 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1542 - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1450 + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1543 1451 } 1452 + 1453 + inode_unlock(inode); 1544 1454 1545 1455 mnt_drop_write_file(filp); 1546 1456 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); ··· 1555 1461 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1556 1462 struct super_block *sb = sbi->sb; 1557 1463 __u32 in; 1464 + int ret; 1558 1465 1559 1466 if (!capable(CAP_SYS_ADMIN)) 1560 1467 return -EPERM; ··· 1563 1468 if (get_user(in, (__u32 __user *)arg)) 1564 1469 return -EFAULT; 1565 1470 1471 + ret = mnt_want_write_file(filp); 1472 + if (ret) 1473 + return ret; 1474 + 1566 1475 switch (in) { 1567 1476 case F2FS_GOING_DOWN_FULLSYNC: 1568 1477 sb = freeze_bdev(sb->s_bdev); 1569 1478 if (sb && !IS_ERR(sb)) { 1570 - f2fs_stop_checkpoint(sbi); 1479 + f2fs_stop_checkpoint(sbi, false); 1571 1480 thaw_bdev(sb->s_bdev, sb); 1572 1481 } 1573 1482 break; 1574 1483 case F2FS_GOING_DOWN_METASYNC: 1575 1484 /* do checkpoint only */ 1576 1485 f2fs_sync_fs(sb, 1); 1577 - f2fs_stop_checkpoint(sbi); 1486 + f2fs_stop_checkpoint(sbi, false); 1578 1487 break; 1579 1488 case F2FS_GOING_DOWN_NOSYNC: 1580 - f2fs_stop_checkpoint(sbi); 1489 + f2fs_stop_checkpoint(sbi, false); 1581 1490 break; 1582 1491 case F2FS_GOING_DOWN_METAFLUSH: 1583 1492 sync_meta_pages(sbi, META, LONG_MAX); 1584 - f2fs_stop_checkpoint(sbi); 1493 + f2fs_stop_checkpoint(sbi, false); 1585 1494 break; 1586 1495 default: 1587 - return -EINVAL; 1496 + ret = -EINVAL; 1497 + goto out; 1588 1498 } 1589 1499 f2fs_update_time(sbi, REQ_TIME); 1590 - return 0; 1500 + out: 1501 + mnt_drop_write_file(filp); 1502 + return ret; 1591 1503 } 1592 1504 1593 1505 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) ··· 1615 1513 sizeof(range))) 1616 1514 return -EFAULT; 1617 1515 1516 + ret = mnt_want_write_file(filp); 1517 + if (ret) 1518 + return ret; 1519 + 1618 1520 range.minlen = max((unsigned int)range.minlen, 1619 1521 q->limits.discard_granularity); 1620 1522 ret = f2fs_trim_fs(F2FS_SB(sb), &range); 1523 + mnt_drop_write_file(filp); 1621 1524 if (ret < 0) 1622 1525 return ret; 1623 1526 ··· 1647 1540 { 1648 1541 struct fscrypt_policy policy; 1649 1542 struct inode *inode = file_inode(filp); 1543 + int ret; 1650 1544 1651 1545 if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, 1652 1546 sizeof(policy))) 1653 1547 return -EFAULT; 1654 1548 1549 + ret = mnt_want_write_file(filp); 1550 + if (ret) 1551 + return ret; 1552 + 1655 1553 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1656 - return fscrypt_process_policy(inode, &policy); 1554 + ret = fscrypt_process_policy(inode, &policy); 1555 + 1556 + mnt_drop_write_file(filp); 1557 + return ret; 1657 1558 } 1658 1559 1659 1560 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) ··· 1718 1603 struct inode *inode = file_inode(filp); 1719 1604 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1720 1605 __u32 sync; 1606 + int ret; 1721 1607 1722 1608 if (!capable(CAP_SYS_ADMIN)) 1723 1609 return -EPERM; ··· 1729 1613 if (f2fs_readonly(sbi->sb)) 1730 1614 return -EROFS; 1731 1615 1616 + ret = mnt_want_write_file(filp); 1617 + if (ret) 1618 + return ret; 1619 + 1732 1620 if (!sync) { 1733 - if (!mutex_trylock(&sbi->gc_mutex)) 1734 - return -EBUSY; 1621 + if (!mutex_trylock(&sbi->gc_mutex)) { 1622 + ret = -EBUSY; 1623 + goto out; 1624 + } 1735 1625 } else { 1736 1626 mutex_lock(&sbi->gc_mutex); 1737 1627 } 1738 1628 1739 - return f2fs_gc(sbi, sync); 1629 + ret = f2fs_gc(sbi, sync); 1630 + out: 1631 + mnt_drop_write_file(filp); 1632 + return ret; 1740 1633 } 1741 1634 1742 1635 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) 1743 1636 { 1744 1637 struct inode *inode = file_inode(filp); 1745 1638 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1639 + int ret; 1746 1640 1747 1641 if (!capable(CAP_SYS_ADMIN)) 1748 1642 return -EPERM; ··· 1760 1634 if (f2fs_readonly(sbi->sb)) 1761 1635 return -EROFS; 1762 1636 1763 - return f2fs_sync_fs(sbi->sb, 1); 1637 + ret = mnt_want_write_file(filp); 1638 + if (ret) 1639 + return ret; 1640 + 1641 + ret = f2fs_sync_fs(sbi->sb, 1); 1642 + 1643 + mnt_drop_write_file(filp); 1644 + return ret; 1764 1645 } 1765 1646 1766 1647 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
+5 -22
fs/f2fs/gc.c
··· 96 96 dev_t dev = sbi->sb->s_bdev->bd_dev; 97 97 int err = 0; 98 98 99 - gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 99 + gc_th = f2fs_kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 100 100 if (!gc_th) { 101 101 err = -ENOMEM; 102 102 goto out; ··· 465 465 continue; 466 466 } 467 467 468 - /* set page dirty and write it */ 469 - if (gc_type == FG_GC) { 470 - f2fs_wait_on_page_writeback(node_page, NODE, true); 471 - set_page_dirty(node_page); 472 - } else { 473 - if (!PageWriteback(node_page)) 474 - set_page_dirty(node_page); 475 - } 476 - f2fs_put_page(node_page, 1); 468 + move_node_page(node_page, gc_type); 477 469 stat_inc_node_blk_count(sbi, 1, gc_type); 478 470 } 479 471 ··· 826 834 f2fs_put_page(sum_page, 0); 827 835 } 828 836 829 - if (gc_type == FG_GC) { 830 - if (type == SUM_TYPE_NODE) { 831 - struct writeback_control wbc = { 832 - .sync_mode = WB_SYNC_ALL, 833 - .nr_to_write = LONG_MAX, 834 - .for_reclaim = 0, 835 - }; 836 - sync_node_pages(sbi, 0, &wbc); 837 - } else { 838 - f2fs_submit_merged_bio(sbi, DATA, WRITE); 839 - } 840 - } 837 + if (gc_type == FG_GC) 838 + f2fs_submit_merged_bio(sbi, 839 + (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); 841 840 842 841 blk_finish_plug(&plug); 843 842
+103 -8
fs/f2fs/inline.c
··· 161 161 if (!f2fs_has_inline_data(inode)) 162 162 return 0; 163 163 164 - page = grab_cache_page(inode->i_mapping, 0); 164 + page = f2fs_grab_cache_page(inode->i_mapping, 0, false); 165 165 if (!page) 166 166 return -ENOMEM; 167 167 ··· 303 303 else 304 304 f2fs_put_page(ipage, 0); 305 305 306 - /* 307 - * For the most part, it should be a bug when name_len is zero. 308 - * We stop here for figuring out where the bugs has occurred. 309 - */ 310 - f2fs_bug_on(sbi, d.max < 0); 311 306 return de; 312 307 } 313 308 ··· 350 355 * NOTE: ipage is grabbed by caller, but if any error occurs, we should 351 356 * release ipage in this function. 352 357 */ 353 - static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 358 + static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, 354 359 struct f2fs_inline_dentry *inline_dentry) 355 360 { 356 361 struct page *page; ··· 358 363 struct f2fs_dentry_block *dentry_blk; 359 364 int err; 360 365 361 - page = grab_cache_page(dir->i_mapping, 0); 366 + page = f2fs_grab_cache_page(dir->i_mapping, 0, false); 362 367 if (!page) { 363 368 f2fs_put_page(ipage, 1); 364 369 return -ENOMEM; ··· 400 405 stat_dec_inline_dir(dir); 401 406 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 402 407 408 + F2FS_I(dir)->i_current_depth = 1; 403 409 if (i_size_read(dir) < PAGE_SIZE) { 404 410 i_size_write(dir, PAGE_SIZE); 405 411 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); ··· 410 414 out: 411 415 f2fs_put_page(page, 1); 412 416 return err; 417 + } 418 + 419 + static int f2fs_add_inline_entries(struct inode *dir, 420 + struct f2fs_inline_dentry *inline_dentry) 421 + { 422 + struct f2fs_dentry_ptr d; 423 + unsigned long bit_pos = 0; 424 + int err = 0; 425 + 426 + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); 427 + 428 + while (bit_pos < d.max) { 429 + struct f2fs_dir_entry *de; 430 + struct qstr new_name; 431 + nid_t ino; 432 + umode_t fake_mode; 433 + 434 + if (!test_bit_le(bit_pos, d.bitmap)) { 435 + bit_pos++; 436 + continue; 437 + } 438 + 439 + de = &d.dentry[bit_pos]; 440 + 441 + if (unlikely(!de->name_len)) { 442 + bit_pos++; 443 + continue; 444 + } 445 + 446 + new_name.name = d.filename[bit_pos]; 447 + new_name.len = de->name_len; 448 + 449 + ino = le32_to_cpu(de->ino); 450 + fake_mode = get_de_type(de) << S_SHIFT; 451 + 452 + err = f2fs_add_regular_entry(dir, &new_name, NULL, 453 + ino, fake_mode); 454 + if (err) 455 + goto punch_dentry_pages; 456 + 457 + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 458 + } 459 + return 0; 460 + punch_dentry_pages: 461 + truncate_inode_pages(&dir->i_data, 0); 462 + truncate_blocks(dir, 0, false); 463 + remove_dirty_inode(dir); 464 + return err; 465 + } 466 + 467 + static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, 468 + struct f2fs_inline_dentry *inline_dentry) 469 + { 470 + struct f2fs_inline_dentry *backup_dentry; 471 + struct f2fs_inode_info *fi = F2FS_I(dir); 472 + int err; 473 + 474 + backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry), 475 + GFP_F2FS_ZERO); 476 + if (!backup_dentry) { 477 + f2fs_put_page(ipage, 1); 478 + return -ENOMEM; 479 + } 480 + 481 + memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); 482 + truncate_inline_inode(ipage, 0); 483 + 484 + unlock_page(ipage); 485 + 486 + err = f2fs_add_inline_entries(dir, backup_dentry); 487 + if (err) 488 + goto recover; 489 + 490 + lock_page(ipage); 491 + 492 + stat_dec_inline_dir(dir); 493 + clear_inode_flag(fi, FI_INLINE_DENTRY); 494 + update_inode(dir, ipage); 495 + kfree(backup_dentry); 496 + return 0; 497 + recover: 498 + lock_page(ipage); 499 + memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); 500 + fi->i_current_depth = 0; 501 + i_size_write(dir, MAX_INLINE_DATA); 502 + update_inode(dir, ipage); 503 + f2fs_put_page(ipage, 1); 504 + 505 + kfree(backup_dentry); 506 + return err; 507 + } 508 + 509 + static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 510 + struct f2fs_inline_dentry *inline_dentry) 511 + { 512 + if (!F2FS_I(dir)->i_dir_level) 513 + return f2fs_move_inline_dirents(dir, ipage, inline_dentry); 514 + else 515 + return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); 413 516 } 414 517 415 518 int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
+26 -40
fs/f2fs/inode.c
··· 283 283 cond_resched(); 284 284 goto retry; 285 285 } else if (err != -ENOENT) { 286 - f2fs_stop_checkpoint(sbi); 286 + f2fs_stop_checkpoint(sbi, false); 287 287 } 288 288 return 0; 289 289 } ··· 344 344 sb_start_intwrite(inode->i_sb); 345 345 set_inode_flag(fi, FI_NO_ALLOC); 346 346 i_size_write(inode, 0); 347 - 347 + retry: 348 348 if (F2FS_HAS_BLOCKS(inode)) 349 349 err = f2fs_truncate(inode, true); 350 350 ··· 352 352 f2fs_lock_op(sbi); 353 353 err = remove_inode_page(inode); 354 354 f2fs_unlock_op(sbi); 355 + } 356 + 357 + /* give more chances, if ENOMEM case */ 358 + if (err == -ENOMEM) { 359 + err = 0; 360 + goto retry; 355 361 } 356 362 357 363 sb_end_intwrite(inode->i_sb); ··· 374 368 if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) 375 369 add_ino_entry(sbi, inode->i_ino, UPDATE_INO); 376 370 if (is_inode_flag_set(fi, FI_FREE_NID)) { 377 - if (err && err != -ENOENT) 378 - alloc_nid_done(sbi, inode->i_ino); 379 - else 380 - alloc_nid_failed(sbi, inode->i_ino); 371 + alloc_nid_failed(sbi, inode->i_ino); 381 372 clear_inode_flag(fi, FI_FREE_NID); 382 373 } 383 - 384 - if (err && err != -ENOENT) { 385 - if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) { 386 - /* 387 - * get here because we failed to release resource 388 - * of inode previously, reminder our user to run fsck 389 - * for fixing. 390 - */ 391 - set_sbi_flag(sbi, SBI_NEED_FSCK); 392 - f2fs_msg(sbi->sb, KERN_WARNING, 393 - "inode (ino:%lu) resource leak, run fsck " 394 - "to fix this issue!", inode->i_ino); 395 - } 396 - } 374 + f2fs_bug_on(sbi, err && 375 + !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); 397 376 out_clear: 398 377 fscrypt_put_encryption_info(inode, NULL); 399 378 clear_inode(inode); ··· 388 397 void handle_failed_inode(struct inode *inode) 389 398 { 390 399 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 391 - int err = 0; 400 + struct node_info ni; 392 401 393 - clear_nlink(inode); 394 - make_bad_inode(inode); 402 + /* don't make bad inode, since it becomes a regular file. */ 395 403 unlock_new_inode(inode); 396 404 397 - i_size_write(inode, 0); 398 - if (F2FS_HAS_BLOCKS(inode)) 399 - err = f2fs_truncate(inode, false); 400 - 401 - if (!err) 402 - err = remove_inode_page(inode); 403 - 404 405 /* 405 - * if we skip truncate_node in remove_inode_page bacause we failed 406 - * before, it's better to find another way to release resource of 407 - * this inode (e.g. valid block count, node block or nid). Here we 408 - * choose to add this inode to orphan list, so that we can call iput 409 - * for releasing in orphan recovery flow. 410 - * 411 406 * Note: we should add inode to orphan list before f2fs_unlock_op() 412 407 * so we can prevent losing this orphan when encoutering checkpoint 413 408 * and following suddenly power-off. 414 409 */ 415 - if (err && err != -ENOENT) { 416 - err = acquire_orphan_inode(sbi); 417 - if (!err) 410 + get_node_info(sbi, inode->i_ino, &ni); 411 + 412 + if (ni.blk_addr != NULL_ADDR) { 413 + int err = acquire_orphan_inode(sbi); 414 + if (err) { 415 + set_sbi_flag(sbi, SBI_NEED_FSCK); 416 + f2fs_msg(sbi->sb, KERN_WARNING, 417 + "Too many orphan inodes, run fsck to fix."); 418 + } else { 418 419 add_orphan_inode(sbi, inode->i_ino); 420 + } 421 + alloc_nid_done(sbi, inode->i_ino); 422 + } else { 423 + set_inode_flag(F2FS_I(inode), FI_FREE_NID); 419 424 } 420 425 421 - set_inode_flag(F2FS_I(inode), FI_FREE_NID); 422 426 f2fs_unlock_op(sbi); 423 427 424 428 /* iput will drop the inode object */
+244 -72
fs/f2fs/node.c
··· 407 407 up_write(&nm_i->nat_tree_lock); 408 408 } 409 409 410 + /* 411 + * readahead MAX_RA_NODE number of node pages. 412 + */ 413 + static void ra_node_pages(struct page *parent, int start, int n) 414 + { 415 + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 416 + struct blk_plug plug; 417 + int i, end; 418 + nid_t nid; 419 + 420 + blk_start_plug(&plug); 421 + 422 + /* Then, try readahead for siblings of the desired node */ 423 + end = start + n; 424 + end = min(end, NIDS_PER_BLOCK); 425 + for (i = start; i < end; i++) { 426 + nid = get_nid(parent, i, false); 427 + ra_node_page(sbi, nid); 428 + } 429 + 430 + blk_finish_plug(&plug); 431 + } 432 + 410 433 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 411 434 { 412 435 const long direct_index = ADDRS_PER_INODE(dn->inode); ··· 730 707 return PTR_ERR(page); 731 708 } 732 709 710 + ra_node_pages(page, ofs, NIDS_PER_BLOCK); 711 + 733 712 rn = F2FS_NODE(page); 734 713 if (depth < 3) { 735 714 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { ··· 809 784 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 810 785 } 811 786 787 + ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); 788 + 812 789 /* free direct nodes linked to a partial indirect node */ 813 790 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 814 791 child_nid = get_nid(pages[idx], i, false); ··· 859 832 trace_f2fs_truncate_inode_blocks_enter(inode, from); 860 833 861 834 level = get_node_path(inode, from, offset, noffset); 862 - restart: 835 + 863 836 page = get_node_page(sbi, inode->i_ino); 864 837 if (IS_ERR(page)) { 865 838 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); ··· 923 896 if (offset[1] == 0 && 924 897 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 925 898 lock_page(page); 926 - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 927 - f2fs_put_page(page, 1); 928 - goto restart; 929 - } 899 + BUG_ON(page->mapping != NODE_MAPPING(sbi)); 930 900 f2fs_wait_on_page_writeback(page, NODE, true); 931 901 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 932 902 set_page_dirty(page); ··· 1022 998 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1023 999 return ERR_PTR(-EPERM); 1024 1000 1025 - page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 1001 + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); 1026 1002 if (!page) 1027 1003 return ERR_PTR(-ENOMEM); 1028 1004 ··· 1114 1090 if (apage) 1115 1091 return; 1116 1092 1117 - apage = grab_cache_page(NODE_MAPPING(sbi), nid); 1093 + apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1118 1094 if (!apage) 1119 1095 return; 1120 1096 1121 1097 err = read_node_page(apage, READA); 1122 1098 f2fs_put_page(apage, err ? 1 : 0); 1123 - } 1124 - 1125 - /* 1126 - * readahead MAX_RA_NODE number of node pages. 1127 - */ 1128 - static void ra_node_pages(struct page *parent, int start) 1129 - { 1130 - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1131 - struct blk_plug plug; 1132 - int i, end; 1133 - nid_t nid; 1134 - 1135 - blk_start_plug(&plug); 1136 - 1137 - /* Then, try readahead for siblings of the desired node */ 1138 - end = start + MAX_RA_NODE; 1139 - end = min(end, NIDS_PER_BLOCK); 1140 - for (i = start; i < end; i++) { 1141 - nid = get_nid(parent, i, false); 1142 - ra_node_page(sbi, nid); 1143 - } 1144 - 1145 - blk_finish_plug(&plug); 1146 1099 } 1147 1100 1148 1101 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, ··· 1132 1131 return ERR_PTR(-ENOENT); 1133 1132 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1134 1133 repeat: 1135 - page = grab_cache_page(NODE_MAPPING(sbi), nid); 1134 + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1136 1135 if (!page) 1137 1136 return ERR_PTR(-ENOMEM); 1138 1137 ··· 1145 1144 } 1146 1145 1147 1146 if (parent) 1148 - ra_node_pages(parent, start + 1); 1147 + ra_node_pages(parent, start + 1, MAX_RA_NODE); 1149 1148 1150 1149 lock_page(page); 1151 1150 ··· 1197 1196 { 1198 1197 struct inode *inode; 1199 1198 struct page *page; 1199 + int ret; 1200 1200 1201 1201 /* should flush inline_data before evict_inode */ 1202 1202 inode = ilookup(sbi->sb, ino); 1203 1203 if (!inode) 1204 1204 return; 1205 1205 1206 - page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0); 1206 + page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); 1207 1207 if (!page) 1208 1208 goto iput_out; 1209 - 1210 - if (!trylock_page(page)) 1211 - goto release_out; 1212 1209 1213 1210 if (!PageUptodate(page)) 1214 1211 goto page_out; ··· 1217 1218 if (!clear_page_dirty_for_io(page)) 1218 1219 goto page_out; 1219 1220 1220 - if (!f2fs_write_inline_data(inode, page)) 1221 - inode_dec_dirty_pages(inode); 1222 - else 1221 + ret = f2fs_write_inline_data(inode, page); 1222 + inode_dec_dirty_pages(inode); 1223 + if (ret) 1223 1224 set_page_dirty(page); 1224 1225 page_out: 1225 - unlock_page(page); 1226 - release_out: 1227 - f2fs_put_page(page, 0); 1226 + f2fs_put_page(page, 1); 1228 1227 iput_out: 1229 1228 iput(inode); 1230 1229 } 1231 1230 1232 - int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1233 - struct writeback_control *wbc) 1231 + void move_node_page(struct page *node_page, int gc_type) 1232 + { 1233 + if (gc_type == FG_GC) { 1234 + struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); 1235 + struct writeback_control wbc = { 1236 + .sync_mode = WB_SYNC_ALL, 1237 + .nr_to_write = 1, 1238 + .for_reclaim = 0, 1239 + }; 1240 + 1241 + set_page_dirty(node_page); 1242 + f2fs_wait_on_page_writeback(node_page, NODE, true); 1243 + 1244 + f2fs_bug_on(sbi, PageWriteback(node_page)); 1245 + if (!clear_page_dirty_for_io(node_page)) 1246 + goto out_page; 1247 + 1248 + if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) 1249 + unlock_page(node_page); 1250 + goto release_page; 1251 + } else { 1252 + /* set page dirty and write it */ 1253 + if (!PageWriteback(node_page)) 1254 + set_page_dirty(node_page); 1255 + } 1256 + out_page: 1257 + unlock_page(node_page); 1258 + release_page: 1259 + f2fs_put_page(node_page, 0); 1260 + } 1261 + 1262 + static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1234 1263 { 1235 1264 pgoff_t index, end; 1236 1265 struct pagevec pvec; 1237 - int step = ino ? 2 : 0; 1266 + struct page *last_page = NULL; 1267 + 1268 + pagevec_init(&pvec, 0); 1269 + index = 0; 1270 + end = ULONG_MAX; 1271 + 1272 + while (index <= end) { 1273 + int i, nr_pages; 1274 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1275 + PAGECACHE_TAG_DIRTY, 1276 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1277 + if (nr_pages == 0) 1278 + break; 1279 + 1280 + for (i = 0; i < nr_pages; i++) { 1281 + struct page *page = pvec.pages[i]; 1282 + 1283 + if (unlikely(f2fs_cp_error(sbi))) { 1284 + f2fs_put_page(last_page, 0); 1285 + pagevec_release(&pvec); 1286 + return ERR_PTR(-EIO); 1287 + } 1288 + 1289 + if (!IS_DNODE(page) || !is_cold_node(page)) 1290 + continue; 1291 + if (ino_of_node(page) != ino) 1292 + continue; 1293 + 1294 + lock_page(page); 1295 + 1296 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1297 + continue_unlock: 1298 + unlock_page(page); 1299 + continue; 1300 + } 1301 + if (ino_of_node(page) != ino) 1302 + goto continue_unlock; 1303 + 1304 + if (!PageDirty(page)) { 1305 + /* someone wrote it for us */ 1306 + goto continue_unlock; 1307 + } 1308 + 1309 + if (last_page) 1310 + f2fs_put_page(last_page, 0); 1311 + 1312 + get_page(page); 1313 + last_page = page; 1314 + unlock_page(page); 1315 + } 1316 + pagevec_release(&pvec); 1317 + cond_resched(); 1318 + } 1319 + return last_page; 1320 + } 1321 + 1322 + int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1323 + struct writeback_control *wbc, bool atomic) 1324 + { 1325 + pgoff_t index, end; 1326 + struct pagevec pvec; 1327 + int ret = 0; 1328 + struct page *last_page = NULL; 1329 + bool marked = false; 1330 + 1331 + if (atomic) { 1332 + last_page = last_fsync_dnode(sbi, ino); 1333 + if (IS_ERR_OR_NULL(last_page)) 1334 + return PTR_ERR_OR_ZERO(last_page); 1335 + } 1336 + retry: 1337 + pagevec_init(&pvec, 0); 1338 + index = 0; 1339 + end = ULONG_MAX; 1340 + 1341 + while (index <= end) { 1342 + int i, nr_pages; 1343 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1344 + PAGECACHE_TAG_DIRTY, 1345 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1346 + if (nr_pages == 0) 1347 + break; 1348 + 1349 + for (i = 0; i < nr_pages; i++) { 1350 + struct page *page = pvec.pages[i]; 1351 + 1352 + if (unlikely(f2fs_cp_error(sbi))) { 1353 + f2fs_put_page(last_page, 0); 1354 + pagevec_release(&pvec); 1355 + return -EIO; 1356 + } 1357 + 1358 + if (!IS_DNODE(page) || !is_cold_node(page)) 1359 + continue; 1360 + if (ino_of_node(page) != ino) 1361 + continue; 1362 + 1363 + lock_page(page); 1364 + 1365 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1366 + continue_unlock: 1367 + unlock_page(page); 1368 + continue; 1369 + } 1370 + if (ino_of_node(page) != ino) 1371 + goto continue_unlock; 1372 + 1373 + if (!PageDirty(page) && page != last_page) { 1374 + /* someone wrote it for us */ 1375 + goto continue_unlock; 1376 + } 1377 + 1378 + f2fs_wait_on_page_writeback(page, NODE, true); 1379 + BUG_ON(PageWriteback(page)); 1380 + 1381 + if (!atomic || page == last_page) { 1382 + set_fsync_mark(page, 1); 1383 + if (IS_INODE(page)) 1384 + set_dentry_mark(page, 1385 + need_dentry_mark(sbi, ino)); 1386 + /* may be written by other thread */ 1387 + if (!PageDirty(page)) 1388 + set_page_dirty(page); 1389 + } 1390 + 1391 + if (!clear_page_dirty_for_io(page)) 1392 + goto continue_unlock; 1393 + 1394 + ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1395 + if (ret) { 1396 + unlock_page(page); 1397 + f2fs_put_page(last_page, 0); 1398 + break; 1399 + } 1400 + if (page == last_page) { 1401 + f2fs_put_page(page, 0); 1402 + marked = true; 1403 + break; 1404 + } 1405 + } 1406 + pagevec_release(&pvec); 1407 + cond_resched(); 1408 + 1409 + if (ret || marked) 1410 + break; 1411 + } 1412 + if (!ret && atomic && !marked) { 1413 + f2fs_msg(sbi->sb, KERN_DEBUG, 1414 + "Retry to write fsync mark: ino=%u, idx=%lx", 1415 + ino, last_page->index); 1416 + lock_page(last_page); 1417 + set_page_dirty(last_page); 1418 + unlock_page(last_page); 1419 + goto retry; 1420 + } 1421 + return ret ? -EIO: 0; 1422 + } 1423 + 1424 + int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) 1425 + { 1426 + pgoff_t index, end; 1427 + struct pagevec pvec; 1428 + int step = 0; 1238 1429 int nwritten = 0; 1239 1430 1240 1431 pagevec_init(&pvec, 0); ··· 1463 1274 if (step == 2 && (!IS_DNODE(page) || 1464 1275 !is_cold_node(page))) 1465 1276 continue; 1466 - 1467 - /* 1468 - * If an fsync mode, 1469 - * we should not skip writing node pages. 1470 - */ 1471 1277 lock_node: 1472 - if (ino && ino_of_node(page) == ino) 1473 - lock_page(page); 1474 - else if (!trylock_page(page)) 1278 + if (!trylock_page(page)) 1475 1279 continue; 1476 1280 1477 1281 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { ··· 1472 1290 unlock_page(page); 1473 1291 continue; 1474 1292 } 1475 - if (ino && ino_of_node(page) != ino) 1476 - goto continue_unlock; 1477 1293 1478 1294 if (!PageDirty(page)) { 1479 1295 /* someone wrote it for us */ ··· 1479 1299 } 1480 1300 1481 1301 /* flush inline_data */ 1482 - if (!ino && is_inline_node(page)) { 1302 + if (is_inline_node(page)) { 1483 1303 clear_inline_node(page); 1484 1304 unlock_page(page); 1485 1305 flush_inline_data(sbi, ino_of_node(page)); ··· 1492 1312 if (!clear_page_dirty_for_io(page)) 1493 1313 goto continue_unlock; 1494 1314 1495 - /* called by fsync() */ 1496 - if (ino && IS_DNODE(page)) { 1497 - set_fsync_mark(page, 1); 1498 - if (IS_INODE(page)) 1499 - set_dentry_mark(page, 1500 - need_dentry_mark(sbi, ino)); 1501 - nwritten++; 1502 - } else { 1503 - set_fsync_mark(page, 0); 1504 - set_dentry_mark(page, 0); 1505 - } 1315 + set_fsync_mark(page, 0); 1316 + set_dentry_mark(page, 0); 1506 1317 1507 1318 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1508 1319 unlock_page(page); ··· 1641 1470 1642 1471 diff = nr_pages_to_write(sbi, NODE, wbc); 1643 1472 wbc->sync_mode = WB_SYNC_NONE; 1644 - sync_node_pages(sbi, 0, wbc); 1473 + sync_node_pages(sbi, wbc); 1645 1474 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1646 1475 return 0; 1647 1476 ··· 1695 1524 struct f2fs_nm_info *nm_i = NM_I(sbi); 1696 1525 struct free_nid *i; 1697 1526 struct nat_entry *ne; 1698 - bool allocated = false; 1699 1527 1700 1528 if (!available_free_memory(sbi, FREE_NIDS)) 1701 1529 return -1; ··· 1708 1538 ne = __lookup_nat_cache(nm_i, nid); 1709 1539 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1710 1540 nat_get_blkaddr(ne) != NULL_ADDR)) 1711 - allocated = true; 1712 - if (allocated) 1713 1541 return 0; 1714 1542 } 1715 1543 ··· 1840 1672 struct f2fs_nm_info *nm_i = NM_I(sbi); 1841 1673 struct free_nid *i = NULL; 1842 1674 retry: 1675 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1676 + if (time_to_inject(FAULT_ALLOC_NID)) 1677 + return false; 1678 + #endif 1843 1679 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1844 1680 return false; 1845 1681 ··· 2018 1846 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2019 1847 return -EINVAL; 2020 1848 2021 - ipage = grab_cache_page(NODE_MAPPING(sbi), ino); 1849 + ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); 2022 1850 if (!ipage) 2023 1851 return -ENOMEM; 2024 1852
+87 -62
fs/f2fs/recovery.c
··· 49 49 50 50 bool space_for_roll_forward(struct f2fs_sb_info *sbi) 51 51 { 52 - if (sbi->last_valid_block_count + sbi->alloc_valid_block_count 53 - > sbi->user_block_count) 52 + s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); 53 + 54 + if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) 54 55 return false; 55 56 return true; 56 57 } ··· 68 67 return NULL; 69 68 } 70 69 71 - static int recover_dentry(struct inode *inode, struct page *ipage) 70 + static struct fsync_inode_entry *add_fsync_inode(struct list_head *head, 71 + struct inode *inode) 72 + { 73 + struct fsync_inode_entry *entry; 74 + 75 + entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 76 + if (!entry) 77 + return NULL; 78 + 79 + entry->inode = inode; 80 + list_add_tail(&entry->list, head); 81 + 82 + return entry; 83 + } 84 + 85 + static void del_fsync_inode(struct fsync_inode_entry *entry) 86 + { 87 + iput(entry->inode); 88 + list_del(&entry->list); 89 + kmem_cache_free(fsync_entry_slab, entry); 90 + } 91 + 92 + static int recover_dentry(struct inode *inode, struct page *ipage, 93 + struct list_head *dir_list) 72 94 { 73 95 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 74 96 nid_t pino = le32_to_cpu(raw_inode->i_pino); ··· 99 75 struct qstr name; 100 76 struct page *page; 101 77 struct inode *dir, *einode; 78 + struct fsync_inode_entry *entry; 102 79 int err = 0; 103 80 104 - dir = f2fs_iget(inode->i_sb, pino); 105 - if (IS_ERR(dir)) { 106 - err = PTR_ERR(dir); 107 - goto out; 81 + entry = get_fsync_inode(dir_list, pino); 82 + if (!entry) { 83 + dir = f2fs_iget(inode->i_sb, pino); 84 + if (IS_ERR(dir)) { 85 + err = PTR_ERR(dir); 86 + goto out; 87 + } 88 + 89 + entry = add_fsync_inode(dir_list, dir); 90 + if (!entry) { 91 + err = -ENOMEM; 92 + iput(dir); 93 + goto out; 94 + } 108 95 } 109 96 110 - if (file_enc_name(inode)) { 111 - iput(dir); 97 + dir = entry->inode; 98 + 99 + if (file_enc_name(inode)) 112 100 return 0; 113 - } 114 101 115 102 name.len = le32_to_cpu(raw_inode->i_namelen); 116 103 name.name = raw_inode->i_name; ··· 129 94 if (unlikely(name.len > F2FS_NAME_LEN)) { 130 95 WARN_ON(1); 131 96 err = -ENAMETOOLONG; 132 - goto out_err; 97 + goto out; 133 98 } 134 99 retry: 135 100 de = f2fs_find_entry(dir, &name, &page); ··· 155 120 goto retry; 156 121 } 157 122 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); 158 - if (err) 159 - goto out_err; 160 - 161 - if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { 162 - iput(dir); 163 - } else { 164 - add_dirty_dir_inode(dir); 165 - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); 166 - } 167 123 168 124 goto out; 169 125 170 126 out_unmap_put: 171 127 f2fs_dentry_kunmap(dir, page); 172 128 f2fs_put_page(page, 0); 173 - out_err: 174 - iput(dir); 175 129 out: 176 130 f2fs_msg(inode->i_sb, KERN_NOTICE, 177 131 "%s: ino = %x, name = %s, dir = %lx, err = %d", ··· 222 198 { 223 199 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 224 200 struct curseg_info *curseg; 201 + struct inode *inode; 225 202 struct page *page = NULL; 226 203 block_t blkaddr; 227 204 int err = 0; ··· 230 205 /* get node pages in the current segment */ 231 206 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 232 207 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 233 - 234 - ra_meta_pages(sbi, blkaddr, 1, META_POR, true); 235 208 236 209 while (1) { 237 210 struct fsync_inode_entry *entry; ··· 256 233 break; 257 234 } 258 235 259 - /* add this fsync inode to the list */ 260 - entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 261 - if (!entry) { 262 - err = -ENOMEM; 263 - break; 264 - } 265 236 /* 266 237 * CP | dnode(F) | inode(DF) 267 238 * For this case, we should not give up now. 268 239 */ 269 - entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 270 - if (IS_ERR(entry->inode)) { 271 - err = PTR_ERR(entry->inode); 272 - kmem_cache_free(fsync_entry_slab, entry); 240 + inode = f2fs_iget(sbi->sb, ino_of_node(page)); 241 + if (IS_ERR(inode)) { 242 + err = PTR_ERR(inode); 273 243 if (err == -ENOENT) { 274 244 err = 0; 275 245 goto next; 276 246 } 277 247 break; 278 248 } 279 - list_add_tail(&entry->list, head); 249 + 250 + /* add this fsync inode to the list */ 251 + entry = add_fsync_inode(head, inode); 252 + if (!entry) { 253 + err = -ENOMEM; 254 + iput(inode); 255 + break; 256 + } 280 257 } 281 258 entry->blkaddr = blkaddr; 282 259 283 - if (IS_INODE(page)) { 284 - entry->last_inode = blkaddr; 285 - if (is_dent_dnode(page)) 286 - entry->last_dentry = blkaddr; 287 - } 260 + if (IS_INODE(page) && is_dent_dnode(page)) 261 + entry->last_dentry = blkaddr; 288 262 next: 289 263 /* check next segment */ 290 264 blkaddr = next_blkaddr_of_node(page); ··· 297 277 { 298 278 struct fsync_inode_entry *entry, *tmp; 299 279 300 - list_for_each_entry_safe(entry, tmp, head, list) { 301 - iput(entry->inode); 302 - list_del(&entry->list); 303 - kmem_cache_free(fsync_entry_slab, entry); 304 - } 280 + list_for_each_entry_safe(entry, tmp, head, list) 281 + del_fsync_inode(entry); 305 282 } 306 283 307 284 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, ··· 461 444 */ 462 445 if (dest == NEW_ADDR) { 463 446 truncate_data_blocks_range(&dn, 1); 464 - err = reserve_new_block(&dn); 465 - f2fs_bug_on(sbi, err); 447 + reserve_new_block(&dn); 466 448 continue; 467 449 } 468 450 ··· 470 454 471 455 if (src == NULL_ADDR) { 472 456 err = reserve_new_block(&dn); 457 + #ifdef CONFIG_F2FS_FAULT_INJECTION 458 + while (err) 459 + err = reserve_new_block(&dn); 460 + #endif 473 461 /* We should not get -ENOSPC */ 474 462 f2fs_bug_on(sbi, err); 475 463 } ··· 506 486 return err; 507 487 } 508 488 509 - static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) 489 + static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, 490 + struct list_head *dir_list) 510 491 { 511 492 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 512 493 struct curseg_info *curseg; ··· 534 513 break; 535 514 } 536 515 537 - entry = get_fsync_inode(head, ino_of_node(page)); 516 + entry = get_fsync_inode(inode_list, ino_of_node(page)); 538 517 if (!entry) 539 518 goto next; 540 519 /* ··· 542 521 * In this case, we can lose the latest inode(x). 543 522 * So, call recover_inode for the inode update. 544 523 */ 545 - if (entry->last_inode == blkaddr) 524 + if (IS_INODE(page)) 546 525 recover_inode(entry->inode, page); 547 526 if (entry->last_dentry == blkaddr) { 548 - err = recover_dentry(entry->inode, page); 527 + err = recover_dentry(entry->inode, page, dir_list); 549 528 if (err) { 550 529 f2fs_put_page(page, 1); 551 530 break; ··· 557 536 break; 558 537 } 559 538 560 - if (entry->blkaddr == blkaddr) { 561 - iput(entry->inode); 562 - list_del(&entry->list); 563 - kmem_cache_free(fsync_entry_slab, entry); 564 - } 539 + if (entry->blkaddr == blkaddr) 540 + del_fsync_inode(entry); 565 541 next: 566 542 /* check next segment */ 567 543 blkaddr = next_blkaddr_of_node(page); ··· 569 551 return err; 570 552 } 571 553 572 - int recover_fsync_data(struct f2fs_sb_info *sbi) 554 + int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) 573 555 { 574 556 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 575 557 struct list_head inode_list; 558 + struct list_head dir_list; 576 559 block_t blkaddr; 577 560 int err; 561 + int ret = 0; 578 562 bool need_writecp = false; 579 563 580 564 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", ··· 585 565 return -ENOMEM; 586 566 587 567 INIT_LIST_HEAD(&inode_list); 568 + INIT_LIST_HEAD(&dir_list); 588 569 589 570 /* prevent checkpoint */ 590 571 mutex_lock(&sbi->cp_mutex); ··· 594 573 595 574 /* step #1: find fsynced inode numbers */ 596 575 err = find_fsync_dnodes(sbi, &inode_list); 597 - if (err) 576 + if (err || list_empty(&inode_list)) 598 577 goto out; 599 578 600 - if (list_empty(&inode_list)) 579 + if (check_only) { 580 + ret = 1; 601 581 goto out; 582 + } 602 583 603 584 need_writecp = true; 604 585 605 586 /* step #2: recover data */ 606 - err = recover_data(sbi, &inode_list); 587 + err = recover_data(sbi, &inode_list, &dir_list); 607 588 if (!err) 608 589 f2fs_bug_on(sbi, !list_empty(&inode_list)); 609 590 out: 610 591 destroy_fsync_dnodes(&inode_list); 611 - kmem_cache_destroy(fsync_entry_slab); 612 592 613 593 /* truncate meta pages to be used by the recovery */ 614 594 truncate_inode_pages_range(META_MAPPING(sbi), ··· 647 625 } else { 648 626 mutex_unlock(&sbi->cp_mutex); 649 627 } 650 - return err; 628 + 629 + destroy_fsync_dnodes(&dir_list); 630 + kmem_cache_destroy(fsync_entry_slab); 631 + return ret ? ret: err; 651 632 }
+6 -2
fs/f2fs/segment.c
··· 223 223 f2fs_put_dnode(&dn); 224 224 } 225 225 next: 226 - ClearPageUptodate(page); 226 + /* we don't need to invalidate this in the sccessful status */ 227 + if (drop || recover) 228 + ClearPageUptodate(page); 227 229 set_page_private(page, 0); 228 - ClearPageUptodate(page); 230 + ClearPagePrivate(page); 229 231 f2fs_put_page(page, 1); 230 232 231 233 list_del(&cur->list); ··· 240 238 void drop_inmem_pages(struct inode *inode) 241 239 { 242 240 struct f2fs_inode_info *fi = F2FS_I(inode); 241 + 242 + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 243 243 244 244 mutex_lock(&fi->inmem_lock); 245 245 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+5 -4
fs/f2fs/segment.h
··· 158 158 }; 159 159 160 160 struct seg_entry { 161 - unsigned short valid_blocks; /* # of valid blocks */ 161 + unsigned int type:6; /* segment type like CURSEG_XXX_TYPE */ 162 + unsigned int valid_blocks:10; /* # of valid blocks */ 163 + unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ 164 + unsigned int padding:6; /* padding */ 162 165 unsigned char *cur_valid_map; /* validity bitmap of blocks */ 163 166 /* 164 167 * # of valid blocks and the validity bitmap stored in the the last 165 168 * checkpoint pack. This information is used by the SSR mode. 166 169 */ 167 - unsigned short ckpt_valid_blocks; 168 - unsigned char *ckpt_valid_map; 170 + unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */ 169 171 unsigned char *discard_map; 170 - unsigned char type; /* segment type like CURSEG_XXX_TYPE */ 171 172 unsigned long long mtime; /* modification time of the segment */ 172 173 }; 173 174
+246 -42
fs/f2fs/super.c
··· 39 39 static struct kmem_cache *f2fs_inode_cachep; 40 40 static struct kset *f2fs_kset; 41 41 42 + #ifdef CONFIG_F2FS_FAULT_INJECTION 43 + struct f2fs_fault_info f2fs_fault; 44 + 45 + char *fault_name[FAULT_MAX] = { 46 + [FAULT_KMALLOC] = "kmalloc", 47 + [FAULT_PAGE_ALLOC] = "page alloc", 48 + [FAULT_ALLOC_NID] = "alloc nid", 49 + [FAULT_ORPHAN] = "orphan", 50 + [FAULT_BLOCK] = "no more block", 51 + [FAULT_DIR_DEPTH] = "too big dir depth", 52 + }; 53 + 54 + static void f2fs_build_fault_attr(unsigned int rate) 55 + { 56 + if (rate) { 57 + atomic_set(&f2fs_fault.inject_ops, 0); 58 + f2fs_fault.inject_rate = rate; 59 + f2fs_fault.inject_type = (1 << FAULT_MAX) - 1; 60 + } else { 61 + memset(&f2fs_fault, 0, sizeof(struct f2fs_fault_info)); 62 + } 63 + } 64 + #endif 65 + 42 66 /* f2fs-wide shrinker description */ 43 67 static struct shrinker f2fs_shrinker_info = { 44 68 .scan_objects = f2fs_shrink_scan, ··· 92 68 Opt_noextent_cache, 93 69 Opt_noinline_data, 94 70 Opt_data_flush, 71 + Opt_fault_injection, 95 72 Opt_err, 96 73 }; 97 74 ··· 118 93 {Opt_noextent_cache, "noextent_cache"}, 119 94 {Opt_noinline_data, "noinline_data"}, 120 95 {Opt_data_flush, "data_flush"}, 96 + {Opt_fault_injection, "fault_injection=%u"}, 121 97 {Opt_err, NULL}, 122 98 }; 123 99 ··· 128 102 SM_INFO, /* struct f2fs_sm_info */ 129 103 NM_INFO, /* struct f2fs_nm_info */ 130 104 F2FS_SBI, /* struct f2fs_sb_info */ 105 + #ifdef CONFIG_F2FS_FAULT_INJECTION 106 + FAULT_INFO_RATE, /* struct f2fs_fault_info */ 107 + FAULT_INFO_TYPE, /* struct f2fs_fault_info */ 108 + #endif 131 109 }; 132 110 133 111 struct f2fs_attr { ··· 153 123 return (unsigned char *)NM_I(sbi); 154 124 else if (struct_type == F2FS_SBI) 155 125 return (unsigned char *)sbi; 126 + #ifdef CONFIG_F2FS_FAULT_INJECTION 127 + else if (struct_type == FAULT_INFO_RATE || 128 + struct_type == FAULT_INFO_TYPE) 129 + return (unsigned char *)&f2fs_fault; 130 + #endif 156 131 return NULL; 157 132 } 158 133 ··· 207 172 ret = kstrtoul(skip_spaces(buf), 0, &t); 208 173 if (ret < 0) 209 174 return ret; 175 + #ifdef CONFIG_F2FS_FAULT_INJECTION 176 + if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) 177 + return -EINVAL; 178 + #endif 210 179 *ui = t; 211 180 return count; 212 181 } ··· 276 237 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 277 238 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); 278 239 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); 240 + #ifdef CONFIG_F2FS_FAULT_INJECTION 241 + F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); 242 + F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); 243 + #endif 279 244 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); 280 245 281 246 #define ATTR_LIST(name) (&f2fs_attr_##name.attr) ··· 316 273 .release = f2fs_sb_release, 317 274 }; 318 275 276 + #ifdef CONFIG_F2FS_FAULT_INJECTION 277 + /* sysfs for f2fs fault injection */ 278 + static struct kobject f2fs_fault_inject; 279 + 280 + static struct attribute *f2fs_fault_attrs[] = { 281 + ATTR_LIST(inject_rate), 282 + ATTR_LIST(inject_type), 283 + NULL 284 + }; 285 + 286 + static struct kobj_type f2fs_fault_ktype = { 287 + .default_attrs = f2fs_fault_attrs, 288 + .sysfs_ops = &f2fs_attr_ops, 289 + }; 290 + #endif 291 + 319 292 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) 320 293 { 321 294 struct va_format vaf; ··· 358 299 substring_t args[MAX_OPT_ARGS]; 359 300 char *p, *name; 360 301 int arg = 0; 302 + 303 + #ifdef CONFIG_F2FS_FAULT_INJECTION 304 + f2fs_build_fault_attr(0); 305 + #endif 361 306 362 307 if (!options) 363 308 return 0; ··· 496 433 case Opt_data_flush: 497 434 set_opt(sbi, DATA_FLUSH); 498 435 break; 436 + case Opt_fault_injection: 437 + if (args->from && match_int(args, &arg)) 438 + return -EINVAL; 439 + #ifdef CONFIG_F2FS_FAULT_INJECTION 440 + f2fs_build_fault_attr(arg); 441 + #else 442 + f2fs_msg(sb, KERN_INFO, 443 + "FAULT_INJECTION was not selected"); 444 + #endif 445 + break; 499 446 default: 500 447 f2fs_msg(sb, KERN_ERR, 501 448 "Unrecognized mount option \"%s\" or missing value", ··· 526 453 527 454 init_once((void *) fi); 528 455 456 + if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) { 457 + kmem_cache_free(f2fs_inode_cachep, fi); 458 + return NULL; 459 + } 460 + 529 461 /* Initialize f2fs-specific inode info */ 530 462 fi->vfs_inode.i_version = 1; 531 - atomic_set(&fi->dirty_pages, 0); 532 463 fi->i_current_depth = 1; 533 464 fi->i_advise = 0; 534 465 init_rwsem(&fi->i_sem); ··· 607 530 608 531 static void f2fs_destroy_inode(struct inode *inode) 609 532 { 533 + percpu_counter_destroy(&F2FS_I(inode)->dirty_pages); 610 534 call_rcu(&inode->i_rcu, f2fs_i_callback); 535 + } 536 + 537 + static void destroy_percpu_info(struct f2fs_sb_info *sbi) 538 + { 539 + int i; 540 + 541 + for (i = 0; i < NR_COUNT_TYPE; i++) 542 + percpu_counter_destroy(&sbi->nr_pages[i]); 543 + percpu_counter_destroy(&sbi->alloc_valid_block_count); 544 + percpu_counter_destroy(&sbi->total_valid_inode_count); 611 545 } 612 546 613 547 static void f2fs_put_super(struct super_block *sb) ··· 627 539 628 540 if (sbi->s_proc) { 629 541 remove_proc_entry("segment_info", sbi->s_proc); 542 + remove_proc_entry("segment_bits", sbi->s_proc); 630 543 remove_proc_entry(sb->s_id, f2fs_proc_root); 631 544 } 632 545 kobject_del(&sbi->s_kobj); ··· 657 568 * normally superblock is clean, so we need to release this. 658 569 * In addition, EIO will skip do checkpoint, we need this as well. 659 570 */ 660 - release_ino_entry(sbi); 571 + release_ino_entry(sbi, true); 661 572 release_discard_addrs(sbi); 662 573 663 574 f2fs_leave_shrinker(sbi); 664 575 mutex_unlock(&sbi->umount_mutex); 665 576 666 577 /* our cp_error case, we can wait for any writeback page */ 667 - if (get_pages(sbi, F2FS_WRITEBACK)) 668 - f2fs_flush_merged_bios(sbi); 578 + f2fs_flush_merged_bios(sbi); 669 579 670 580 iput(sbi->node_inode); 671 581 iput(sbi->meta_inode); ··· 681 593 if (sbi->s_chksum_driver) 682 594 crypto_free_shash(sbi->s_chksum_driver); 683 595 kfree(sbi->raw_super); 596 + 597 + destroy_percpu_info(sbi); 684 598 kfree(sbi); 685 599 } 686 600 ··· 835 745 return 0; 836 746 } 837 747 838 - static int segment_info_open_fs(struct inode *inode, struct file *file) 748 + static int segment_bits_seq_show(struct seq_file *seq, void *offset) 839 749 { 840 - return single_open(file, segment_info_seq_show, PDE_DATA(inode)); 750 + struct super_block *sb = seq->private; 751 + struct f2fs_sb_info *sbi = F2FS_SB(sb); 752 + unsigned int total_segs = 753 + le32_to_cpu(sbi->raw_super->segment_count_main); 754 + int i, j; 755 + 756 + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" 757 + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); 758 + 759 + for (i = 0; i < total_segs; i++) { 760 + struct seg_entry *se = get_seg_entry(sbi, i); 761 + 762 + seq_printf(seq, "%-10d", i); 763 + seq_printf(seq, "%d|%-3u|", se->type, 764 + get_valid_blocks(sbi, i, 1)); 765 + for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) 766 + seq_printf(seq, "%x ", se->cur_valid_map[j]); 767 + seq_putc(seq, '\n'); 768 + } 769 + return 0; 841 770 } 842 771 843 - static const struct file_operations f2fs_seq_segment_info_fops = { 844 - .owner = THIS_MODULE, 845 - .open = segment_info_open_fs, 846 - .read = seq_read, 847 - .llseek = seq_lseek, 848 - .release = single_release, 772 + #define F2FS_PROC_FILE_DEF(_name) \ 773 + static int _name##_open_fs(struct inode *inode, struct file *file) \ 774 + { \ 775 + return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ 776 + } \ 777 + \ 778 + static const struct file_operations f2fs_seq_##_name##_fops = { \ 779 + .owner = THIS_MODULE, \ 780 + .open = _name##_open_fs, \ 781 + .read = seq_read, \ 782 + .llseek = seq_lseek, \ 783 + .release = single_release, \ 849 784 }; 785 + 786 + F2FS_PROC_FILE_DEF(segment_info); 787 + F2FS_PROC_FILE_DEF(segment_bits); 850 788 851 789 static void default_options(struct f2fs_sb_info *sbi) 852 790 { ··· 909 791 org_mount_opt = sbi->mount_opt; 910 792 active_logs = sbi->active_logs; 911 793 912 - if (*flags & MS_RDONLY) { 913 - set_opt(sbi, FASTBOOT); 914 - set_sbi_flag(sbi, SBI_IS_DIRTY); 794 + /* recover superblocks we couldn't write due to previous RO mount */ 795 + if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { 796 + err = f2fs_commit_super(sbi, false); 797 + f2fs_msg(sb, KERN_INFO, 798 + "Try to recover all the superblocks, ret: %d", err); 799 + if (!err) 800 + clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); 915 801 } 916 - 917 - sync_filesystem(sb); 918 802 919 803 sbi->mount_opt.opt = 0; 920 804 default_options(sbi); ··· 949 829 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { 950 830 if (sbi->gc_thread) { 951 831 stop_gc_thread(sbi); 952 - f2fs_sync_fs(sb, 1); 953 832 need_restart_gc = true; 954 833 } 955 834 } else if (!sbi->gc_thread) { ··· 956 837 if (err) 957 838 goto restore_opts; 958 839 need_stop_gc = true; 840 + } 841 + 842 + if (*flags & MS_RDONLY) { 843 + writeback_inodes_sb(sb, WB_REASON_SYNC); 844 + sync_inodes_sb(sb); 845 + 846 + set_sbi_flag(sbi, SBI_IS_DIRTY); 847 + set_sbi_flag(sbi, SBI_IS_CLOSE); 848 + f2fs_sync_fs(sb, 1); 849 + clear_sbi_flag(sbi, SBI_IS_CLOSE); 959 850 } 960 851 961 852 /* ··· 981 852 } 982 853 skip: 983 854 /* Update the POSIXACL Flag */ 984 - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 855 + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 985 856 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); 857 + 986 858 return 0; 987 859 restore_gc: 988 860 if (need_restart_gc) { ··· 1023 893 ctx, len, NULL); 1024 894 } 1025 895 896 + static int f2fs_key_prefix(struct inode *inode, u8 **key) 897 + { 898 + *key = F2FS_I_SB(inode)->key_prefix; 899 + return F2FS_I_SB(inode)->key_prefix_size; 900 + } 901 + 1026 902 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, 1027 903 void *fs_data) 1028 904 { ··· 1045 909 1046 910 static struct fscrypt_operations f2fs_cryptops = { 1047 911 .get_context = f2fs_get_context, 912 + .key_prefix = f2fs_key_prefix, 1048 913 .set_context = f2fs_set_context, 1049 914 .is_encrypted = f2fs_encrypted_inode, 1050 915 .empty_dir = f2fs_empty_dir, ··· 1135 998 return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); 1136 999 } 1137 1000 1138 - static inline bool sanity_check_area_boundary(struct super_block *sb, 1001 + static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, 1139 1002 struct buffer_head *bh) 1140 1003 { 1141 1004 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1142 1005 (bh->b_data + F2FS_SUPER_OFFSET); 1006 + struct super_block *sb = sbi->sb; 1143 1007 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 1144 1008 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); 1145 1009 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr); ··· 1219 1081 segment0_blkaddr) >> log_blocks_per_seg); 1220 1082 1221 1083 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) { 1084 + set_sbi_flag(sbi, SBI_NEED_SB_WRITE); 1222 1085 res = "internally"; 1223 1086 } else { 1224 1087 err = __f2fs_commit_super(bh, NULL); ··· 1237 1098 return false; 1238 1099 } 1239 1100 1240 - static int sanity_check_raw_super(struct super_block *sb, 1101 + static int sanity_check_raw_super(struct f2fs_sb_info *sbi, 1241 1102 struct buffer_head *bh) 1242 1103 { 1243 1104 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1244 1105 (bh->b_data + F2FS_SUPER_OFFSET); 1106 + struct super_block *sb = sbi->sb; 1245 1107 unsigned int blocksize; 1246 1108 1247 1109 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { ··· 1309 1169 } 1310 1170 1311 1171 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ 1312 - if (sanity_check_area_boundary(sb, bh)) 1172 + if (sanity_check_area_boundary(sbi, bh)) 1313 1173 return 1; 1314 1174 1315 1175 return 0; ··· 1341 1201 static void init_sb_info(struct f2fs_sb_info *sbi) 1342 1202 { 1343 1203 struct f2fs_super_block *raw_super = sbi->raw_super; 1344 - int i; 1345 1204 1346 1205 sbi->log_sectors_per_block = 1347 1206 le32_to_cpu(raw_super->log_sectors_per_block); ··· 1360 1221 sbi->cur_victim_sec = NULL_SECNO; 1361 1222 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; 1362 1223 1363 - for (i = 0; i < NR_COUNT_TYPE; i++) 1364 - atomic_set(&sbi->nr_pages[i], 0); 1365 - 1366 1224 sbi->dir_level = DEF_DIR_LEVEL; 1367 1225 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; 1368 1226 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; ··· 1367 1231 1368 1232 INIT_LIST_HEAD(&sbi->s_list); 1369 1233 mutex_init(&sbi->umount_mutex); 1234 + 1235 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 1236 + memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, 1237 + F2FS_KEY_DESC_PREFIX_SIZE); 1238 + sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE; 1239 + #endif 1240 + } 1241 + 1242 + static int init_percpu_info(struct f2fs_sb_info *sbi) 1243 + { 1244 + int i, err; 1245 + 1246 + for (i = 0; i < NR_COUNT_TYPE; i++) { 1247 + err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); 1248 + if (err) 1249 + return err; 1250 + } 1251 + 1252 + err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL); 1253 + if (err) 1254 + return err; 1255 + 1256 + return percpu_counter_init(&sbi->total_valid_inode_count, 0, 1257 + GFP_KERNEL); 1370 1258 } 1371 1259 1372 1260 /* ··· 1399 1239 * to get the first valid one. If any one of them is broken, we pass 1400 1240 * them recovery flag back to the caller. 1401 1241 */ 1402 - static int read_raw_super_block(struct super_block *sb, 1242 + static int read_raw_super_block(struct f2fs_sb_info *sbi, 1403 1243 struct f2fs_super_block **raw_super, 1404 1244 int *valid_super_block, int *recovery) 1405 1245 { 1246 + struct super_block *sb = sbi->sb; 1406 1247 int block; 1407 1248 struct buffer_head *bh; 1408 1249 struct f2fs_super_block *super; ··· 1423 1262 } 1424 1263 1425 1264 /* sanity checking of raw super */ 1426 - if (sanity_check_raw_super(sb, bh)) { 1265 + if (sanity_check_raw_super(sbi, bh)) { 1427 1266 f2fs_msg(sb, KERN_ERR, 1428 1267 "Can't find valid F2FS filesystem in %dth superblock", 1429 1268 block + 1); ··· 1459 1298 struct buffer_head *bh; 1460 1299 int err; 1461 1300 1301 + if ((recover && f2fs_readonly(sbi->sb)) || 1302 + bdev_read_only(sbi->sb->s_bdev)) { 1303 + set_sbi_flag(sbi, SBI_NEED_SB_WRITE); 1304 + return -EROFS; 1305 + } 1306 + 1462 1307 /* write back-up superblock first */ 1463 1308 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); 1464 1309 if (!bh) ··· 1490 1323 struct f2fs_sb_info *sbi; 1491 1324 struct f2fs_super_block *raw_super; 1492 1325 struct inode *root; 1493 - long err; 1326 + int err; 1494 1327 bool retry = true, need_fsck = false; 1495 1328 char *options = NULL; 1496 1329 int recovery, i, valid_super_block; ··· 1507 1340 if (!sbi) 1508 1341 return -ENOMEM; 1509 1342 1343 + sbi->sb = sb; 1344 + 1510 1345 /* Load the checksum driver */ 1511 1346 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); 1512 1347 if (IS_ERR(sbi->s_chksum_driver)) { ··· 1524 1355 goto free_sbi; 1525 1356 } 1526 1357 1527 - err = read_raw_super_block(sb, &raw_super, &valid_super_block, 1358 + err = read_raw_super_block(sbi, &raw_super, &valid_super_block, 1528 1359 &recovery); 1529 1360 if (err) 1530 1361 goto free_sbi; ··· 1559 1390 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); 1560 1391 1561 1392 /* init f2fs-specific super block info */ 1562 - sbi->sb = sb; 1563 1393 sbi->raw_super = raw_super; 1564 1394 sbi->valid_super_block = valid_super_block; 1565 1395 mutex_init(&sbi->gc_mutex); ··· 1583 1415 init_waitqueue_head(&sbi->cp_wait); 1584 1416 init_sb_info(sbi); 1585 1417 1418 + err = init_percpu_info(sbi); 1419 + if (err) 1420 + goto free_options; 1421 + 1586 1422 /* get an inode for meta space */ 1587 1423 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 1588 1424 if (IS_ERR(sbi->meta_inode)) { ··· 1603 1431 1604 1432 sbi->total_valid_node_count = 1605 1433 le32_to_cpu(sbi->ckpt->valid_node_count); 1606 - sbi->total_valid_inode_count = 1607 - le32_to_cpu(sbi->ckpt->valid_inode_count); 1434 + percpu_counter_set(&sbi->total_valid_inode_count, 1435 + le32_to_cpu(sbi->ckpt->valid_inode_count)); 1608 1436 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); 1609 1437 sbi->total_valid_block_count = 1610 1438 le64_to_cpu(sbi->ckpt->valid_block_count); 1611 1439 sbi->last_valid_block_count = sbi->total_valid_block_count; 1612 - sbi->alloc_valid_block_count = 0; 1440 + 1613 1441 for (i = 0; i < NR_INODE_TYPE; i++) { 1614 1442 INIT_LIST_HEAD(&sbi->inode_list[i]); 1615 1443 spin_lock_init(&sbi->inode_lock[i]); ··· 1687 1515 if (f2fs_proc_root) 1688 1516 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1689 1517 1690 - if (sbi->s_proc) 1518 + if (sbi->s_proc) { 1691 1519 proc_create_data("segment_info", S_IRUGO, sbi->s_proc, 1692 1520 &f2fs_seq_segment_info_fops, sb); 1521 + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, 1522 + &f2fs_seq_segment_bits_fops, sb); 1523 + } 1693 1524 1694 1525 sbi->s_kobj.kset = f2fs_kset; 1695 1526 init_completion(&sbi->s_kobj_unregister); ··· 1716 1541 if (need_fsck) 1717 1542 set_sbi_flag(sbi, SBI_NEED_FSCK); 1718 1543 1719 - err = recover_fsync_data(sbi); 1720 - if (err) { 1544 + err = recover_fsync_data(sbi, false); 1545 + if (err < 0) { 1721 1546 need_fsck = true; 1722 1547 f2fs_msg(sb, KERN_ERR, 1723 - "Cannot recover all fsync data errno=%ld", err); 1548 + "Cannot recover all fsync data errno=%d", err); 1549 + goto free_kobj; 1550 + } 1551 + } else { 1552 + err = recover_fsync_data(sbi, true); 1553 + 1554 + if (!f2fs_readonly(sb) && err > 0) { 1555 + err = -EINVAL; 1556 + f2fs_msg(sb, KERN_ERR, 1557 + "Need to recover fsync data"); 1724 1558 goto free_kobj; 1725 1559 } 1726 1560 } 1561 + 1727 1562 /* recover_fsync_data() cleared this already */ 1728 1563 clear_sbi_flag(sbi, SBI_POR_DOING); 1729 1564 ··· 1750 1565 kfree(options); 1751 1566 1752 1567 /* recover broken superblock */ 1753 - if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { 1568 + if (recovery) { 1754 1569 err = f2fs_commit_super(sbi, true); 1755 1570 f2fs_msg(sb, KERN_INFO, 1756 - "Try to recover %dth superblock, ret: %ld", 1571 + "Try to recover %dth superblock, ret: %d", 1757 1572 sbi->valid_super_block ? 1 : 2, err); 1758 1573 } 1759 1574 ··· 1768 1583 free_proc: 1769 1584 if (sbi->s_proc) { 1770 1585 remove_proc_entry("segment_info", sbi->s_proc); 1586 + remove_proc_entry("segment_bits", sbi->s_proc); 1771 1587 remove_proc_entry(sb->s_id, f2fs_proc_root); 1772 1588 } 1773 1589 f2fs_destroy_stats(sbi); ··· 1789 1603 make_bad_inode(sbi->meta_inode); 1790 1604 iput(sbi->meta_inode); 1791 1605 free_options: 1606 + destroy_percpu_info(sbi); 1792 1607 kfree(options); 1793 1608 free_sb_buf: 1794 1609 kfree(raw_super); ··· 1875 1688 err = -ENOMEM; 1876 1689 goto free_extent_cache; 1877 1690 } 1691 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1692 + f2fs_fault_inject.kset = f2fs_kset; 1693 + f2fs_build_fault_attr(0); 1694 + err = kobject_init_and_add(&f2fs_fault_inject, &f2fs_fault_ktype, 1695 + NULL, "fault_injection"); 1696 + if (err) { 1697 + f2fs_fault_inject.kset = NULL; 1698 + goto free_kset; 1699 + } 1700 + #endif 1878 1701 err = register_shrinker(&f2fs_shrinker_info); 1879 1702 if (err) 1880 1703 goto free_kset; ··· 1903 1706 free_shrinker: 1904 1707 unregister_shrinker(&f2fs_shrinker_info); 1905 1708 free_kset: 1709 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1710 + if (f2fs_fault_inject.kset) 1711 + kobject_put(&f2fs_fault_inject); 1712 + #endif 1906 1713 kset_unregister(f2fs_kset); 1907 1714 free_extent_cache: 1908 1715 destroy_extent_cache(); ··· 1926 1725 { 1927 1726 remove_proc_entry("fs/f2fs", NULL); 1928 1727 f2fs_destroy_root_stats(); 1929 - unregister_shrinker(&f2fs_shrinker_info); 1930 1728 unregister_filesystem(&f2fs_fs_type); 1729 + unregister_shrinker(&f2fs_shrinker_info); 1730 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1731 + kobject_put(&f2fs_fault_inject); 1732 + #endif 1733 + kset_unregister(f2fs_kset); 1931 1734 destroy_extent_cache(); 1932 1735 destroy_checkpoint_caches(); 1933 1736 destroy_segment_manager_caches(); 1934 1737 destroy_node_manager_caches(); 1935 1738 destroy_inodecache(); 1936 - kset_unregister(f2fs_kset); 1937 1739 f2fs_destroy_trace_ios(); 1938 1740 } 1939 1741
+1 -2
fs/f2fs/xattr.c
··· 498 498 free = free + ENTRY_SIZE(here); 499 499 500 500 if (unlikely(free < newsize)) { 501 - error = -ENOSPC; 501 + error = -E2BIG; 502 502 goto exit; 503 503 } 504 504 } ··· 526 526 * Before we come here, old entry is removed. 527 527 * We just write new entry. 528 528 */ 529 - memset(last, 0, newsize); 530 529 last->e_name_index = index; 531 530 last->e_name_len = len; 532 531 memcpy(last->e_name, name, len);
+2
include/linux/f2fs_fs.h
··· 508 508 F2FS_FT_MAX 509 509 }; 510 510 511 + #define S_SHIFT 12 512 + 511 513 #endif /* _LINUX_F2FS_FS_H */
+1
include/linux/fscrypto.h
··· 175 175 */ 176 176 struct fscrypt_operations { 177 177 int (*get_context)(struct inode *, void *, size_t); 178 + int (*key_prefix)(struct inode *, u8 **); 178 179 int (*prepare_context)(struct inode *); 179 180 int (*set_context)(struct inode *, const void *, size_t, void *); 180 181 int (*dummy_context)(struct inode *);
+14 -10
include/trace/events/f2fs.h
··· 694 694 __entry->ret) 695 695 ); 696 696 697 - TRACE_EVENT(f2fs_reserve_new_block, 697 + TRACE_EVENT(f2fs_reserve_new_blocks, 698 698 699 - TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), 699 + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node, 700 + blkcnt_t count), 700 701 701 - TP_ARGS(inode, nid, ofs_in_node), 702 + TP_ARGS(inode, nid, ofs_in_node, count), 702 703 703 704 TP_STRUCT__entry( 704 705 __field(dev_t, dev) 705 706 __field(nid_t, nid) 706 707 __field(unsigned int, ofs_in_node) 708 + __field(blkcnt_t, count) 707 709 ), 708 710 709 711 TP_fast_assign( 710 712 __entry->dev = inode->i_sb->s_dev; 711 713 __entry->nid = nid; 712 714 __entry->ofs_in_node = ofs_in_node; 715 + __entry->count = count; 713 716 ), 714 717 715 - TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", 718 + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu", 716 719 show_dev(__entry), 717 720 (unsigned int)__entry->nid, 718 - __entry->ofs_in_node) 721 + __entry->ofs_in_node, 722 + (unsigned long long)__entry->count) 719 723 ); 720 724 721 725 DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ··· 1275 1271 1276 1272 DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, 1277 1273 1278 - TP_PROTO(struct super_block *sb, int type, int count), 1274 + TP_PROTO(struct super_block *sb, int type, s64 count), 1279 1275 1280 1276 TP_ARGS(sb, type, count), 1281 1277 1282 1278 TP_STRUCT__entry( 1283 1279 __field(dev_t, dev) 1284 1280 __field(int, type) 1285 - __field(int, count) 1281 + __field(s64, count) 1286 1282 ), 1287 1283 1288 1284 TP_fast_assign( ··· 1291 1287 __entry->count = count; 1292 1288 ), 1293 1289 1294 - TP_printk("dev = (%d,%d), %s, dirty count = %d", 1290 + TP_printk("dev = (%d,%d), %s, dirty count = %lld", 1295 1291 show_dev(__entry), 1296 1292 show_file_type(__entry->type), 1297 1293 __entry->count) ··· 1299 1295 1300 1296 DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter, 1301 1297 1302 - TP_PROTO(struct super_block *sb, int type, int count), 1298 + TP_PROTO(struct super_block *sb, int type, s64 count), 1303 1299 1304 1300 TP_ARGS(sb, type, count) 1305 1301 ); 1306 1302 1307 1303 DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, 1308 1304 1309 - TP_PROTO(struct super_block *sb, int type, int count), 1305 + TP_PROTO(struct super_block *sb, int type, s64 count), 1310 1306 1311 1307 TP_ARGS(sb, type, count) 1312 1308 );