Merge tag 'for-f2fs-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

+1 -1

MAINTAINERS

··· 4793 4793 F2FS FILE SYSTEM 4794 4794 M: Jaegeuk Kim <jaegeuk@kernel.org> 4795 4795 M: Changman Lee <cm224.lee@samsung.com> 4796 - R: Chao Yu <chao2.yu@samsung.com> 4796 + R: Chao Yu <yuchao0@huawei.com> 4797 4797 L: linux-f2fs-devel@lists.sourceforge.net 4798 4798 W: http://en.wikipedia.org/wiki/F2FS 4799 4799 T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git

+76 -44

fs/crypto/keyinfo.c

··· 78 78 return res; 79 79 } 80 80 81 + static int validate_user_key(struct fscrypt_info *crypt_info, 82 + struct fscrypt_context *ctx, u8 *raw_key, 83 + u8 *prefix, int prefix_size) 84 + { 85 + u8 *full_key_descriptor; 86 + struct key *keyring_key; 87 + struct fscrypt_key *master_key; 88 + const struct user_key_payload *ukp; 89 + int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1; 90 + int res; 91 + 92 + full_key_descriptor = kmalloc(full_key_len, GFP_NOFS); 93 + if (!full_key_descriptor) 94 + return -ENOMEM; 95 + 96 + memcpy(full_key_descriptor, prefix, prefix_size); 97 + sprintf(full_key_descriptor + prefix_size, 98 + "%*phN", FS_KEY_DESCRIPTOR_SIZE, 99 + ctx->master_key_descriptor); 100 + full_key_descriptor[full_key_len - 1] = '\0'; 101 + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); 102 + kfree(full_key_descriptor); 103 + if (IS_ERR(keyring_key)) 104 + return PTR_ERR(keyring_key); 105 + 106 + if (keyring_key->type != &key_type_logon) { 107 + printk_once(KERN_WARNING 108 + "%s: key type must be logon\n", __func__); 109 + res = -ENOKEY; 110 + goto out; 111 + } 112 + down_read(&keyring_key->sem); 113 + ukp = user_key_payload(keyring_key); 114 + if (ukp->datalen != sizeof(struct fscrypt_key)) { 115 + res = -EINVAL; 116 + up_read(&keyring_key->sem); 117 + goto out; 118 + } 119 + master_key = (struct fscrypt_key *)ukp->data; 120 + BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); 121 + 122 + if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { 123 + printk_once(KERN_WARNING 124 + "%s: key size incorrect: %d\n", 125 + __func__, master_key->size); 126 + res = -ENOKEY; 127 + up_read(&keyring_key->sem); 128 + goto out; 129 + } 130 + res = derive_key_aes(ctx->nonce, master_key->raw, raw_key); 131 + up_read(&keyring_key->sem); 132 + if (res) 133 + goto out; 134 + 135 + crypt_info->ci_keyring_key = keyring_key; 136 + return 0; 137 + out: 138 + key_put(keyring_key); 139 + return res; 140 + } 141 + 81 142 static void put_crypt_info(struct fscrypt_info *ci) 82 143 { 83 144 if (!ci) ··· 152 91 int get_crypt_info(struct inode *inode) 153 92 { 154 93 struct fscrypt_info *crypt_info; 155 - u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + 156 - (FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; 157 - struct key *keyring_key = NULL; 158 - struct fscrypt_key *master_key; 159 94 struct fscrypt_context ctx; 160 - const struct user_key_payload *ukp; 161 95 struct crypto_skcipher *ctfm; 162 96 const char *cipher_str; 163 97 u8 raw_key[FS_MAX_KEY_SIZE]; ··· 223 167 memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); 224 168 goto got_key; 225 169 } 226 - memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX, 227 - FS_KEY_DESC_PREFIX_SIZE); 228 - sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE, 229 - "%*phN", FS_KEY_DESCRIPTOR_SIZE, 230 - ctx.master_key_descriptor); 231 - full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + 232 - (2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0'; 233 - keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); 234 - if (IS_ERR(keyring_key)) { 235 - res = PTR_ERR(keyring_key); 236 - keyring_key = NULL; 237 - goto out; 238 - } 239 - crypt_info->ci_keyring_key = keyring_key; 240 - if (keyring_key->type != &key_type_logon) { 241 - printk_once(KERN_WARNING 242 - "%s: key type must be logon\n", __func__); 243 - res = -ENOKEY; 244 - goto out; 245 - } 246 - down_read(&keyring_key->sem); 247 - ukp = user_key_payload(keyring_key); 248 - if (ukp->datalen != sizeof(struct fscrypt_key)) { 249 - res = -EINVAL; 250 - up_read(&keyring_key->sem); 251 - goto out; 252 - } 253 - master_key = (struct fscrypt_key *)ukp->data; 254 - BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); 255 170 256 - if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { 257 - printk_once(KERN_WARNING 258 - "%s: key size incorrect: %d\n", 259 - __func__, master_key->size); 260 - res = -ENOKEY; 261 - up_read(&keyring_key->sem); 171 + res = validate_user_key(crypt_info, &ctx, raw_key, 172 + FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE); 173 + if (res && inode->i_sb->s_cop->key_prefix) { 174 + u8 *prefix = NULL; 175 + int prefix_size, res2; 176 + 177 + prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix); 178 + res2 = validate_user_key(crypt_info, &ctx, raw_key, 179 + prefix, prefix_size); 180 + if (res2) { 181 + if (res2 == -ENOKEY) 182 + res = -ENOKEY; 183 + goto out; 184 + } 185 + } else if (res) { 262 186 goto out; 263 187 } 264 - res = derive_key_aes(ctx.nonce, master_key->raw, raw_key); 265 - up_read(&keyring_key->sem); 266 - if (res) 267 - goto out; 268 188 got_key: 269 189 ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); 270 190 if (!ctfm || IS_ERR(ctfm)) {

+8

fs/f2fs/Kconfig

··· 94 94 information and block IO patterns in the filesystem level. 95 95 96 96 If unsure, say N. 97 + 98 + config F2FS_FAULT_INJECTION 99 + bool "F2FS fault injection facility" 100 + depends on F2FS_FS 101 + help 102 + Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on. 103 + 104 + If unsure, say N.

+2 -2

fs/f2fs/acl.c

··· 115 115 struct f2fs_acl_entry *entry; 116 116 int i; 117 117 118 - f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 118 + f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 119 119 sizeof(struct f2fs_acl_entry), GFP_NOFS); 120 120 if (!f2fs_acl) 121 121 return ERR_PTR(-ENOMEM); ··· 175 175 176 176 retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); 177 177 if (retval > 0) { 178 - value = kmalloc(retval, GFP_F2FS_ZERO); 178 + value = f2fs_kmalloc(retval, GFP_F2FS_ZERO); 179 179 if (!value) 180 180 return ERR_PTR(-ENOMEM); 181 181 retval = f2fs_getxattr(inode, name_index, "", value,

+36 -31

fs/f2fs/checkpoint.c

··· 26 26 static struct kmem_cache *ino_entry_slab; 27 27 struct kmem_cache *inode_entry_slab; 28 28 29 + void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) 30 + { 31 + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 32 + sbi->sb->s_flags |= MS_RDONLY; 33 + if (!end_io) 34 + f2fs_flush_merged_bios(sbi); 35 + } 36 + 29 37 /* 30 38 * We guarantee no failure on the returned page. 31 39 */ ··· 42 34 struct address_space *mapping = META_MAPPING(sbi); 43 35 struct page *page = NULL; 44 36 repeat: 45 - page = grab_cache_page(mapping, index); 37 + page = f2fs_grab_cache_page(mapping, index, false); 46 38 if (!page) { 47 39 cond_resched(); 48 40 goto repeat; ··· 72 64 if (unlikely(!is_meta)) 73 65 fio.rw &= ~REQ_META; 74 66 repeat: 75 - page = grab_cache_page(mapping, index); 67 + page = f2fs_grab_cache_page(mapping, index, false); 76 68 if (!page) { 77 69 cond_resched(); 78 70 goto repeat; ··· 99 91 * meta page. 100 92 */ 101 93 if (unlikely(!PageUptodate(page))) 102 - f2fs_stop_checkpoint(sbi); 94 + f2fs_stop_checkpoint(sbi, false); 103 95 out: 104 96 return page; 105 97 } ··· 194 186 BUG(); 195 187 } 196 188 197 - page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr); 189 + page = f2fs_grab_cache_page(META_MAPPING(sbi), 190 + fio.new_blkaddr, false); 198 191 if (!page) 199 192 continue; 200 193 if (PageUptodate(page)) { ··· 220 211 bool readahead = false; 221 212 222 213 page = find_get_page(META_MAPPING(sbi), index); 223 - if (!page || (page && !PageUptodate(page))) 214 + if (!page || !PageUptodate(page)) 224 215 readahead = true; 225 216 f2fs_put_page(page, 0); 226 217 ··· 457 448 return e ? true : false; 458 449 } 459 450 460 - void release_ino_entry(struct f2fs_sb_info *sbi) 451 + void release_ino_entry(struct f2fs_sb_info *sbi, bool all) 461 452 { 462 453 struct ino_entry *e, *tmp; 463 454 int i; 464 455 465 - for (i = APPEND_INO; i <= UPDATE_INO; i++) { 456 + for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 466 457 struct inode_management *im = &sbi->im[i]; 467 458 468 459 spin_lock(&im->ino_lock); ··· 482 473 int err = 0; 483 474 484 475 spin_lock(&im->ino_lock); 476 + 477 + #ifdef CONFIG_F2FS_FAULT_INJECTION 478 + if (time_to_inject(FAULT_ORPHAN)) { 479 + spin_unlock(&im->ino_lock); 480 + return -ENOSPC; 481 + } 482 + #endif 485 483 if (unlikely(im->ino_num >= sbi->max_orphans)) 486 484 err = -ENOSPC; 487 485 else ··· 793 777 !S_ISLNK(inode->i_mode)) 794 778 return; 795 779 796 - spin_lock(&sbi->inode_lock[type]); 797 - __add_dirty_inode(inode, type); 798 - inode_inc_dirty_pages(inode); 799 - spin_unlock(&sbi->inode_lock[type]); 780 + if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { 781 + spin_lock(&sbi->inode_lock[type]); 782 + __add_dirty_inode(inode, type); 783 + spin_unlock(&sbi->inode_lock[type]); 784 + } 800 785 786 + inode_inc_dirty_pages(inode); 801 787 SetPagePrivate(page); 802 788 f2fs_trace_pid(page); 803 - } 804 - 805 - void add_dirty_dir_inode(struct inode *inode) 806 - { 807 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 808 - 809 - spin_lock(&sbi->inode_lock[DIR_INODE]); 810 - __add_dirty_inode(inode, DIR_INODE); 811 - spin_unlock(&sbi->inode_lock[DIR_INODE]); 812 789 } 813 790 814 791 void remove_dirty_inode(struct inode *inode) 815 792 { 816 793 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 817 - struct f2fs_inode_info *fi = F2FS_I(inode); 818 794 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 819 795 820 796 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 821 797 !S_ISLNK(inode->i_mode)) 822 798 return; 823 799 800 + if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH)) 801 + return; 802 + 824 803 spin_lock(&sbi->inode_lock[type]); 825 804 __remove_dirty_inode(inode, type); 826 805 spin_unlock(&sbi->inode_lock[type]); 827 - 828 - /* Only from the recovery routine */ 829 - if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { 830 - clear_inode_flag(fi, FI_DELAY_IPUT); 831 - iput(inode); 832 - } 833 806 } 834 807 835 808 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) ··· 897 892 898 893 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 899 894 up_write(&sbi->node_write); 900 - err = sync_node_pages(sbi, 0, &wbc); 895 + err = sync_node_pages(sbi, &wbc); 901 896 if (err) { 902 897 f2fs_unlock_all(sbi); 903 898 goto out; ··· 922 917 for (;;) { 923 918 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 924 919 925 - if (!get_pages(sbi, F2FS_WRITEBACK)) 920 + if (!atomic_read(&sbi->nr_wb_bios)) 926 921 break; 927 922 928 923 io_schedule_timeout(5*HZ); ··· 1087 1082 1088 1083 /* update user_block_counts */ 1089 1084 sbi->last_valid_block_count = sbi->total_valid_block_count; 1090 - sbi->alloc_valid_block_count = 0; 1085 + percpu_counter_set(&sbi->alloc_valid_block_count, 0); 1091 1086 1092 1087 /* Here, we only have one bio having CP pack */ 1093 1088 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); ··· 1103 1098 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1104 1099 discard_blk); 1105 1100 1106 - release_ino_entry(sbi); 1101 + release_ino_entry(sbi, false); 1107 1102 1108 1103 if (unlikely(f2fs_cp_error(sbi))) 1109 1104 return -EIO;

+137 -60

fs/f2fs/data.c

··· 68 68 69 69 if (unlikely(bio->bi_error)) { 70 70 set_bit(AS_EIO, &page->mapping->flags); 71 - f2fs_stop_checkpoint(sbi); 71 + f2fs_stop_checkpoint(sbi, true); 72 72 } 73 73 end_page_writeback(page); 74 - dec_page_count(sbi, F2FS_WRITEBACK); 75 74 } 76 - 77 - if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait)) 75 + if (atomic_dec_and_test(&sbi->nr_wb_bios) && 76 + wq_has_sleeper(&sbi->cp_wait)) 78 77 wake_up(&sbi->cp_wait); 79 78 80 79 bio_put(bio); ··· 97 98 return bio; 98 99 } 99 100 101 + static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw, 102 + struct bio *bio) 103 + { 104 + if (!is_read_io(rw)) 105 + atomic_inc(&sbi->nr_wb_bios); 106 + submit_bio(rw, bio); 107 + } 108 + 100 109 static void __submit_merged_bio(struct f2fs_bio_info *io) 101 110 { 102 111 struct f2fs_io_info *fio = &io->fio; ··· 117 110 else 118 111 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 119 112 120 - submit_bio(fio->rw, io->bio); 113 + __submit_bio(io->sbi, fio->rw, io->bio); 121 114 io->bio = NULL; 122 115 } 123 116 ··· 235 228 return -EFAULT; 236 229 } 237 230 238 - submit_bio(fio->rw, bio); 231 + __submit_bio(fio->sbi, fio->rw, bio); 239 232 return 0; 240 233 } 241 234 ··· 254 247 verify_block_addr(sbi, fio->new_blkaddr); 255 248 256 249 down_write(&io->io_rwsem); 257 - 258 - if (!is_read) 259 - inc_page_count(sbi, F2FS_WRITEBACK); 260 250 261 251 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || 262 252 io->fio.rw != fio->rw)) ··· 282 278 trace_f2fs_submit_page_mbio(fio->page, fio); 283 279 } 284 280 281 + static void __set_data_blkaddr(struct dnode_of_data *dn) 282 + { 283 + struct f2fs_node *rn = F2FS_NODE(dn->node_page); 284 + __le32 *addr_array; 285 + 286 + /* Get physical address of data block */ 287 + addr_array = blkaddr_in_node(rn); 288 + addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 289 + } 290 + 285 291 /* 286 292 * Lock ordering for the change of data block address: 287 293 * ->data_page ··· 300 286 */ 301 287 void set_data_blkaddr(struct dnode_of_data *dn) 302 288 { 303 - struct f2fs_node *rn; 304 - __le32 *addr_array; 305 - struct page *node_page = dn->node_page; 306 - unsigned int ofs_in_node = dn->ofs_in_node; 307 - 308 - f2fs_wait_on_page_writeback(node_page, NODE, true); 309 - 310 - rn = F2FS_NODE(node_page); 311 - 312 - /* Get physical address of data block */ 313 - addr_array = blkaddr_in_node(rn); 314 - addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 315 - if (set_page_dirty(node_page)) 289 + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 290 + __set_data_blkaddr(dn); 291 + if (set_page_dirty(dn->node_page)) 316 292 dn->node_changed = true; 317 293 } 318 294 ··· 313 309 f2fs_update_extent_cache(dn); 314 310 } 315 311 316 - int reserve_new_block(struct dnode_of_data *dn) 312 + /* dn->ofs_in_node will be returned with up-to-date last block pointer */ 313 + int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) 317 314 { 318 315 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 319 316 317 + if (!count) 318 + return 0; 319 + 320 320 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 321 321 return -EPERM; 322 - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 322 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) 323 323 return -ENOSPC; 324 324 325 - trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 325 + trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, 326 + dn->ofs_in_node, count); 326 327 327 - dn->data_blkaddr = NEW_ADDR; 328 - set_data_blkaddr(dn); 328 + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 329 + 330 + for (; count > 0; dn->ofs_in_node++) { 331 + block_t blkaddr = 332 + datablock_addr(dn->node_page, dn->ofs_in_node); 333 + if (blkaddr == NULL_ADDR) { 334 + dn->data_blkaddr = NEW_ADDR; 335 + __set_data_blkaddr(dn); 336 + count--; 337 + } 338 + } 339 + 340 + if (set_page_dirty(dn->node_page)) 341 + dn->node_changed = true; 342 + 329 343 mark_inode_dirty(dn->inode); 330 344 sync_inode_page(dn); 331 345 return 0; 346 + } 347 + 348 + /* Should keep dn->ofs_in_node unchanged */ 349 + int reserve_new_block(struct dnode_of_data *dn) 350 + { 351 + unsigned int ofs_in_node = dn->ofs_in_node; 352 + int ret; 353 + 354 + ret = reserve_new_blocks(dn, 1); 355 + dn->ofs_in_node = ofs_in_node; 356 + return ret; 332 357 } 333 358 334 359 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) ··· 578 545 struct node_info ni; 579 546 int seg = CURSEG_WARM_DATA; 580 547 pgoff_t fofs; 548 + blkcnt_t count = 1; 581 549 582 550 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 583 551 return -EPERM; ··· 587 553 if (dn->data_blkaddr == NEW_ADDR) 588 554 goto alloc; 589 555 590 - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 556 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) 591 557 return -ENOSPC; 592 558 593 559 alloc: ··· 616 582 struct f2fs_map_blocks map; 617 583 ssize_t ret = 0; 618 584 619 - map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos); 620 - map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from)); 585 + map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 586 + map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from)); 621 587 map.m_next_pgofs = NULL; 622 588 623 589 if (f2fs_encrypted_inode(inode)) ··· 655 621 struct dnode_of_data dn; 656 622 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 657 623 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 658 - pgoff_t pgofs, end_offset; 624 + pgoff_t pgofs, end_offset, end; 659 625 int err = 0, ofs = 1; 626 + unsigned int ofs_in_node, last_ofs_in_node; 627 + blkcnt_t prealloc; 660 628 struct extent_info ei; 661 629 bool allocated = false; 662 630 block_t blkaddr; ··· 668 632 669 633 /* it only supports block size == page size */ 670 634 pgofs = (pgoff_t)map->m_lblk; 635 + end = pgofs + maxblocks; 671 636 672 637 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 673 638 map->m_pblk = ei.blk + pgofs - ei.fofs; ··· 685 648 set_new_dnode(&dn, inode, NULL, NULL, 0); 686 649 err = get_dnode_of_data(&dn, pgofs, mode); 687 650 if (err) { 651 + if (flag == F2FS_GET_BLOCK_BMAP) 652 + map->m_pblk = 0; 688 653 if (err == -ENOENT) { 689 654 err = 0; 690 655 if (map->m_next_pgofs) ··· 696 657 goto unlock_out; 697 658 } 698 659 660 + prealloc = 0; 661 + ofs_in_node = dn.ofs_in_node; 699 662 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 700 663 701 664 next_block: ··· 710 669 goto sync_out; 711 670 } 712 671 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 713 - if (blkaddr == NULL_ADDR) 714 - err = reserve_new_block(&dn); 672 + if (blkaddr == NULL_ADDR) { 673 + prealloc++; 674 + last_ofs_in_node = dn.ofs_in_node; 675 + } 715 676 } else { 716 677 err = __allocate_data_block(&dn); 678 + if (!err) { 679 + set_inode_flag(F2FS_I(inode), 680 + FI_APPEND_WRITE); 681 + allocated = true; 682 + } 717 683 } 718 684 if (err) 719 685 goto sync_out; 720 - allocated = true; 721 686 map->m_flags = F2FS_MAP_NEW; 722 687 blkaddr = dn.data_blkaddr; 723 688 } else { 689 + if (flag == F2FS_GET_BLOCK_BMAP) { 690 + map->m_pblk = 0; 691 + goto sync_out; 692 + } 724 693 if (flag == F2FS_GET_BLOCK_FIEMAP && 725 694 blkaddr == NULL_ADDR) { 726 695 if (map->m_next_pgofs) 727 696 *map->m_next_pgofs = pgofs + 1; 728 697 } 729 698 if (flag != F2FS_GET_BLOCK_FIEMAP || 730 - blkaddr != NEW_ADDR) { 731 - if (flag == F2FS_GET_BLOCK_BMAP) 732 - err = -ENOENT; 699 + blkaddr != NEW_ADDR) 733 700 goto sync_out; 734 - } 735 701 } 736 702 } 703 + 704 + if (flag == F2FS_GET_BLOCK_PRE_AIO) 705 + goto skip; 737 706 738 707 if (map->m_len == 0) { 739 708 /* preallocated unwritten block should be mapped for fiemap. */ ··· 756 705 } else if ((map->m_pblk != NEW_ADDR && 757 706 blkaddr == (map->m_pblk + ofs)) || 758 707 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 759 - flag == F2FS_GET_BLOCK_PRE_DIO || 760 - flag == F2FS_GET_BLOCK_PRE_AIO) { 708 + flag == F2FS_GET_BLOCK_PRE_DIO) { 761 709 ofs++; 762 710 map->m_len++; 763 711 } else { 764 712 goto sync_out; 765 713 } 766 714 715 + skip: 767 716 dn.ofs_in_node++; 768 717 pgofs++; 769 718 770 - if (map->m_len < maxblocks) { 771 - if (dn.ofs_in_node < end_offset) 772 - goto next_block; 719 + /* preallocate blocks in batch for one dnode page */ 720 + if (flag == F2FS_GET_BLOCK_PRE_AIO && 721 + (pgofs == end || dn.ofs_in_node == end_offset)) { 773 722 774 - if (allocated) 775 - sync_inode_page(&dn); 776 - f2fs_put_dnode(&dn); 723 + dn.ofs_in_node = ofs_in_node; 724 + err = reserve_new_blocks(&dn, prealloc); 725 + if (err) 726 + goto sync_out; 777 727 778 - if (create) { 779 - f2fs_unlock_op(sbi); 780 - f2fs_balance_fs(sbi, allocated); 728 + map->m_len += dn.ofs_in_node - ofs_in_node; 729 + if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 730 + err = -ENOSPC; 731 + goto sync_out; 781 732 } 782 - allocated = false; 783 - goto next_dnode; 733 + dn.ofs_in_node = end_offset; 784 734 } 735 + 736 + if (pgofs >= end) 737 + goto sync_out; 738 + else if (dn.ofs_in_node < end_offset) 739 + goto next_block; 740 + 741 + if (allocated) 742 + sync_inode_page(&dn); 743 + f2fs_put_dnode(&dn); 744 + 745 + if (create) { 746 + f2fs_unlock_op(sbi); 747 + f2fs_balance_fs(sbi, allocated); 748 + } 749 + allocated = false; 750 + goto next_dnode; 785 751 786 752 sync_out: 787 753 if (allocated) ··· 1051 983 */ 1052 984 if (bio && (last_block_in_bio != block_nr - 1)) { 1053 985 submit_and_realloc: 1054 - submit_bio(READ, bio); 986 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1055 987 bio = NULL; 1056 988 } 1057 989 if (bio == NULL) { ··· 1094 1026 goto next_page; 1095 1027 confused: 1096 1028 if (bio) { 1097 - submit_bio(READ, bio); 1029 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1098 1030 bio = NULL; 1099 1031 } 1100 1032 unlock_page(page); ··· 1104 1036 } 1105 1037 BUG_ON(pages && !list_empty(pages)); 1106 1038 if (bio) 1107 - submit_bio(READ, bio); 1039 + __submit_bio(F2FS_I_SB(inode), READ, bio); 1108 1040 return 0; 1109 1041 } 1110 1042 ··· 1245 1177 goto redirty_out; 1246 1178 if (f2fs_is_drop_cache(inode)) 1247 1179 goto out; 1248 - if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && 1249 - available_free_memory(sbi, BASE_CHECK)) 1180 + /* we should not write 0'th page having journal header */ 1181 + if (f2fs_is_volatile_file(inode) && (!page->index || 1182 + (!wbc->for_reclaim && 1183 + available_free_memory(sbi, BASE_CHECK)))) 1250 1184 goto redirty_out; 1251 1185 1252 1186 /* Dentry blocks are controlled by checkpoint */ ··· 1550 1480 if (pos + len <= MAX_INLINE_DATA) { 1551 1481 read_inline_data(page, ipage); 1552 1482 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); 1553 - set_inline_node(ipage); 1483 + if (inode->i_nlink) 1484 + set_inline_node(ipage); 1554 1485 } else { 1555 1486 err = f2fs_convert_inline_page(&dn, page); 1556 1487 if (err) ··· 1567 1496 } else { 1568 1497 /* hole case */ 1569 1498 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1570 - if (err || (!err && dn.data_blkaddr == NULL_ADDR)) { 1499 + if (err || dn.data_blkaddr == NULL_ADDR) { 1571 1500 f2fs_put_dnode(&dn); 1572 1501 f2fs_lock_op(sbi); 1573 1502 locked = true; ··· 1754 1683 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1755 1684 1756 1685 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); 1757 - if (err < 0 && iov_iter_rw(iter) == WRITE) 1758 - f2fs_write_failed(mapping, offset + count); 1686 + if (iov_iter_rw(iter) == WRITE) { 1687 + if (err > 0) 1688 + set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1689 + else if (err < 0) 1690 + f2fs_write_failed(mapping, offset + count); 1691 + } 1759 1692 1760 1693 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); 1761 1694 ··· 1789 1714 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1790 1715 return; 1791 1716 1717 + set_page_private(page, 0); 1792 1718 ClearPagePrivate(page); 1793 1719 } 1794 1720 ··· 1803 1727 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1804 1728 return 0; 1805 1729 1730 + set_page_private(page, 0); 1806 1731 ClearPagePrivate(page); 1807 1732 return 1; 1808 1733 }

+15 -10

fs/f2fs/debug.c

··· 48 48 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; 49 49 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 50 50 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 51 - si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); 51 + si->wb_bios = atomic_read(&sbi->nr_wb_bios); 52 52 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 53 53 si->rsvd_segs = reserved_segments(sbi); 54 54 si->overp_segs = overprovision_segments(sbi); ··· 58 58 si->inline_xattr = atomic_read(&sbi->inline_xattr); 59 59 si->inline_inode = atomic_read(&sbi->inline_inode); 60 60 si->inline_dir = atomic_read(&sbi->inline_dir); 61 + si->orphans = sbi->im[ORPHAN_INO].ino_num; 61 62 si->utilization = utilization(sbi); 62 63 63 64 si->free_segs = free_segments(sbi); ··· 144 143 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; 145 144 si->base_mem += 2 * sizeof(struct f2fs_inode_info); 146 145 si->base_mem += sizeof(*sbi->ckpt); 146 + si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; 147 147 148 148 /* build sm */ 149 149 si->base_mem += sizeof(struct f2fs_sm_info); ··· 194 192 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 195 193 sizeof(struct nat_entry_set); 196 194 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 197 - for (i = 0; i <= UPDATE_INO; i++) 195 + for (i = 0; i <= ORPHAN_INO; i++) 198 196 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 199 197 si->cache_mem += atomic_read(&sbi->total_ext_tree) * 200 198 sizeof(struct extent_tree); ··· 218 216 list_for_each_entry(si, &f2fs_stat_list, stat_list) { 219 217 update_general_status(si->sbi); 220 218 221 - seq_printf(s, "\n=====[ partition info(%pg). #%d ]=====\n", 222 - si->sbi->sb->s_bdev, i++); 219 + seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", 220 + si->sbi->sb->s_bdev, i++, 221 + f2fs_readonly(si->sbi->sb) ? "RO": "RW"); 223 222 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", 224 223 si->sit_area_segs, si->nat_area_segs); 225 224 seq_printf(s, "[SSA: %d] [MAIN: %d", ··· 240 237 si->inline_inode); 241 238 seq_printf(s, " - Inline_dentry Inode: %u\n", 242 239 si->inline_dir); 240 + seq_printf(s, " - Orphan Inode: %u\n", 241 + si->orphans); 243 242 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 244 243 si->main_area_segs, si->main_area_sections, 245 244 si->main_area_zones); ··· 300 295 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 301 296 si->ext_tree, si->zombie_tree, si->ext_node); 302 297 seq_puts(s, "\nBalancing F2FS Async:\n"); 303 - seq_printf(s, " - inmem: %4d, wb: %4d\n", 304 - si->inmem_pages, si->wb_pages); 305 - seq_printf(s, " - nodes: %4d in %4d\n", 298 + seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", 299 + si->inmem_pages, si->wb_bios); 300 + seq_printf(s, " - nodes: %4lld in %4d\n", 306 301 si->ndirty_node, si->node_pages); 307 - seq_printf(s, " - dents: %4d in dirs:%4d\n", 302 + seq_printf(s, " - dents: %4lld in dirs:%4d\n", 308 303 si->ndirty_dent, si->ndirty_dirs); 309 - seq_printf(s, " - datas: %4d in files:%4d\n", 304 + seq_printf(s, " - datas: %4lld in files:%4d\n", 310 305 si->ndirty_data, si->ndirty_files); 311 - seq_printf(s, " - meta: %4d in %4d\n", 306 + seq_printf(s, " - meta: %4lld in %4d\n", 312 307 si->ndirty_meta, si->meta_pages); 313 308 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 314 309 si->dirty_nats, si->nats, si->dirty_sits, si->sits);

+69 -59

fs/f2fs/dir.c

··· 48 48 [F2FS_FT_SYMLINK] = DT_LNK, 49 49 }; 50 50 51 - #define S_SHIFT 12 52 51 static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { 53 52 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, 54 53 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, ··· 61 62 void set_de_type(struct f2fs_dir_entry *de, umode_t mode) 62 63 { 63 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 65 + } 66 + 67 + unsigned char get_de_type(struct f2fs_dir_entry *de) 68 + { 69 + if (de->file_type < F2FS_FT_MAX) 70 + return f2fs_filetype_table[de->file_type]; 71 + return DT_UNKNOWN; 64 72 } 65 73 66 74 static unsigned long dir_block_index(unsigned int level, ··· 101 95 else 102 96 kunmap(dentry_page); 103 97 104 - /* 105 - * For the most part, it should be a bug when name_len is zero. 106 - * We stop here for figuring out where the bugs has occurred. 107 - */ 108 - f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); 109 98 return de; 110 99 } 111 100 ··· 125 124 126 125 de = &d->dentry[bit_pos]; 127 126 127 + if (unlikely(!de->name_len)) { 128 + bit_pos++; 129 + continue; 130 + } 131 + 128 132 /* encrypted case */ 129 133 de_name.name = d->filename[bit_pos]; 130 134 de_name.len = le16_to_cpu(de->name_len); ··· 146 140 if (max_slots && max_len > *max_slots) 147 141 *max_slots = max_len; 148 142 max_len = 0; 149 - 150 - /* remain bug on condition */ 151 - if (unlikely(!de->name_len)) 152 - d->max = -1; 153 143 154 144 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 155 145 } ··· 391 389 return page; 392 390 393 391 if (S_ISDIR(inode->i_mode)) { 392 + /* in order to handle error case */ 393 + get_page(page); 394 394 err = make_empty_dir(inode, dir, page); 395 - if (err) 396 - goto error; 395 + if (err) { 396 + lock_page(page); 397 + goto put_error; 398 + } 399 + put_page(page); 397 400 } 398 401 399 402 err = f2fs_init_acl(inode, dir, page, dpage); ··· 442 435 return page; 443 436 444 437 put_error: 445 - f2fs_put_page(page, 1); 446 - error: 447 - /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 438 + /* truncate empty dir pages */ 448 439 truncate_inode_pages(&inode->i_data, 0); 449 - truncate_blocks(inode, 0, false); 450 - remove_dirty_inode(inode); 451 - remove_inode_page(inode); 440 + 441 + clear_nlink(inode); 442 + update_inode(inode, page); 443 + f2fs_put_page(page, 1); 452 444 return ERR_PTR(err); 453 445 } 454 446 ··· 515 509 } 516 510 } 517 511 518 - /* 519 - * Caller should grab and release a rwsem by calling f2fs_lock_op() and 520 - * f2fs_unlock_op(). 521 - */ 522 - int __f2fs_add_link(struct inode *dir, const struct qstr *name, 512 + int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, 523 513 struct inode *inode, nid_t ino, umode_t mode) 524 514 { 525 515 unsigned int bit_pos; ··· 528 526 struct f2fs_dentry_block *dentry_blk = NULL; 529 527 struct f2fs_dentry_ptr d; 530 528 struct page *page = NULL; 531 - struct fscrypt_name fname; 532 - struct qstr new_name; 533 - int slots, err; 534 - 535 - err = fscrypt_setup_filename(dir, name, 0, &fname); 536 - if (err) 537 - return err; 538 - 539 - new_name.name = fname_name(&fname); 540 - new_name.len = fname_len(&fname); 541 - 542 - if (f2fs_has_inline_dentry(dir)) { 543 - err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); 544 - if (!err || err != -EAGAIN) 545 - goto out; 546 - else 547 - err = 0; 548 - } 529 + int slots, err = 0; 549 530 550 531 level = 0; 551 - slots = GET_DENTRY_SLOTS(new_name.len); 552 - dentry_hash = f2fs_dentry_hash(&new_name); 532 + slots = GET_DENTRY_SLOTS(new_name->len); 533 + dentry_hash = f2fs_dentry_hash(new_name); 553 534 554 535 current_depth = F2FS_I(dir)->i_current_depth; 555 536 if (F2FS_I(dir)->chash == dentry_hash) { ··· 541 556 } 542 557 543 558 start: 544 - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { 545 - err = -ENOSPC; 546 - goto out; 547 - } 559 + #ifdef CONFIG_F2FS_FAULT_INJECTION 560 + if (time_to_inject(FAULT_DIR_DEPTH)) 561 + return -ENOSPC; 562 + #endif 563 + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) 564 + return -ENOSPC; 548 565 549 566 /* Increase the depth, if required */ 550 567 if (level == current_depth) ··· 560 573 561 574 for (block = bidx; block <= (bidx + nblock - 1); block++) { 562 575 dentry_page = get_new_data_page(dir, NULL, block, true); 563 - if (IS_ERR(dentry_page)) { 564 - err = PTR_ERR(dentry_page); 565 - goto out; 566 - } 576 + if (IS_ERR(dentry_page)) 577 + return PTR_ERR(dentry_page); 567 578 568 579 dentry_blk = kmap(dentry_page); 569 580 bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, ··· 581 596 582 597 if (inode) { 583 598 down_write(&F2FS_I(inode)->i_sem); 584 - page = init_inode_metadata(inode, dir, &new_name, NULL); 599 + page = init_inode_metadata(inode, dir, new_name, NULL); 585 600 if (IS_ERR(page)) { 586 601 err = PTR_ERR(page); 587 602 goto fail; ··· 591 606 } 592 607 593 608 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 594 - f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); 609 + f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); 595 610 596 611 set_page_dirty(dentry_page); 597 612 ··· 613 628 } 614 629 kunmap(dentry_page); 615 630 f2fs_put_page(dentry_page, 1); 616 - out: 631 + 632 + return err; 633 + } 634 + 635 + /* 636 + * Caller should grab and release a rwsem by calling f2fs_lock_op() and 637 + * f2fs_unlock_op(). 638 + */ 639 + int __f2fs_add_link(struct inode *dir, const struct qstr *name, 640 + struct inode *inode, nid_t ino, umode_t mode) 641 + { 642 + struct fscrypt_name fname; 643 + struct qstr new_name; 644 + int err; 645 + 646 + err = fscrypt_setup_filename(dir, name, 0, &fname); 647 + if (err) 648 + return err; 649 + 650 + new_name.name = fname_name(&fname); 651 + new_name.len = fname_len(&fname); 652 + 653 + err = -EAGAIN; 654 + if (f2fs_has_inline_dentry(dir)) 655 + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); 656 + if (err == -EAGAIN) 657 + err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode); 658 + 617 659 fscrypt_free_filename(&fname); 618 660 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); 619 661 return err; ··· 804 792 continue; 805 793 } 806 794 807 - if (de->file_type < F2FS_FT_MAX) 808 - d_type = f2fs_filetype_table[de->file_type]; 809 - else 810 - d_type = DT_UNKNOWN; 795 + d_type = get_de_type(de); 811 796 812 797 de_name.name = d->filename[bit_pos]; 813 798 de_name.len = le16_to_cpu(de->name_len); ··· 813 804 int save_len = fstr->len; 814 805 int ret; 815 806 816 - de_name.name = kmalloc(de_name.len, GFP_NOFS); 807 + de_name.name = f2fs_kmalloc(de_name.len, GFP_NOFS); 817 808 if (!de_name.name) 818 809 return false; 819 810 ··· 896 887 kunmap(dentry_page); 897 888 f2fs_put_page(dentry_page, 1); 898 889 } 890 + err = 0; 899 891 out: 900 892 fscrypt_fname_free_buffer(&fstr); 901 893 return err;

+1 -2

fs/f2fs/extent_cache.c

··· 196 196 if (!i_ext || !i_ext->len) 197 197 return false; 198 198 199 - set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 200 - le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 199 + get_extent_info(&ei, i_ext); 201 200 202 201 write_lock(&et->lock); 203 202 if (atomic_read(&et->node_cnt))

+143 -54

fs/f2fs/f2fs.h

··· 37 37 } while (0) 38 38 #endif 39 39 40 + #ifdef CONFIG_F2FS_FAULT_INJECTION 41 + enum { 42 + FAULT_KMALLOC, 43 + FAULT_PAGE_ALLOC, 44 + FAULT_ALLOC_NID, 45 + FAULT_ORPHAN, 46 + FAULT_BLOCK, 47 + FAULT_DIR_DEPTH, 48 + FAULT_MAX, 49 + }; 50 + 51 + struct f2fs_fault_info { 52 + atomic_t inject_ops; 53 + unsigned int inject_rate; 54 + unsigned int inject_type; 55 + }; 56 + 57 + extern struct f2fs_fault_info f2fs_fault; 58 + extern char *fault_name[FAULT_MAX]; 59 + #define IS_FAULT_SET(type) (f2fs_fault.inject_type & (1 << (type))) 60 + 61 + static inline bool time_to_inject(int type) 62 + { 63 + if (!f2fs_fault.inject_rate) 64 + return false; 65 + if (type == FAULT_KMALLOC && !IS_FAULT_SET(type)) 66 + return false; 67 + else if (type == FAULT_PAGE_ALLOC && !IS_FAULT_SET(type)) 68 + return false; 69 + else if (type == FAULT_ALLOC_NID && !IS_FAULT_SET(type)) 70 + return false; 71 + else if (type == FAULT_ORPHAN && !IS_FAULT_SET(type)) 72 + return false; 73 + else if (type == FAULT_BLOCK && !IS_FAULT_SET(type)) 74 + return false; 75 + else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type)) 76 + return false; 77 + 78 + atomic_inc(&f2fs_fault.inject_ops); 79 + if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { 80 + atomic_set(&f2fs_fault.inject_ops, 0); 81 + printk("%sF2FS-fs : inject %s in %pF\n", 82 + KERN_INFO, 83 + fault_name[type], 84 + __builtin_return_address(0)); 85 + return true; 86 + } 87 + return false; 88 + } 89 + #endif 90 + 40 91 /* 41 92 * For mount options 42 93 */ ··· 107 56 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 108 57 #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 109 58 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 59 + #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 110 60 111 61 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 112 62 #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) ··· 211 159 struct inode *inode; /* vfs inode pointer */ 212 160 block_t blkaddr; /* block address locating the last fsync */ 213 161 block_t last_dentry; /* block address locating the last dentry */ 214 - block_t last_inode; /* block address locating the last inode */ 215 162 }; 216 163 217 164 #define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) ··· 436 385 /* Use below internally in f2fs*/ 437 386 unsigned long flags; /* use to pass per-file flags */ 438 387 struct rw_semaphore i_sem; /* protect fi info */ 439 - atomic_t dirty_pages; /* # of dirty pages */ 388 + struct percpu_counter dirty_pages; /* # of dirty pages */ 440 389 f2fs_hash_t chash; /* hash value of given file name */ 441 390 unsigned int clevel; /* maximum level of given file name */ 442 391 nid_t i_xattr_nid; /* node id that contains xattrs */ ··· 449 398 }; 450 399 451 400 static inline void get_extent_info(struct extent_info *ext, 452 - struct f2fs_extent i_ext) 401 + struct f2fs_extent *i_ext) 453 402 { 454 - ext->fofs = le32_to_cpu(i_ext.fofs); 455 - ext->blk = le32_to_cpu(i_ext.blk); 456 - ext->len = le32_to_cpu(i_ext.len); 403 + ext->fofs = le32_to_cpu(i_ext->fofs); 404 + ext->blk = le32_to_cpu(i_ext->blk); 405 + ext->len = le32_to_cpu(i_ext->len); 457 406 } 458 407 459 408 static inline void set_raw_extent(struct extent_info *ext, ··· 650 599 * dirty dentry blocks, dirty node blocks, and dirty meta blocks. 651 600 */ 652 601 enum count_type { 653 - F2FS_WRITEBACK, 654 602 F2FS_DIRTY_DENTS, 655 603 F2FS_DIRTY_DATA, 656 604 F2FS_DIRTY_NODES, ··· 722 672 SBI_IS_CLOSE, /* specify unmounting */ 723 673 SBI_NEED_FSCK, /* need fsck.f2fs to fix */ 724 674 SBI_POR_DOING, /* recovery is doing or not */ 675 + SBI_NEED_SB_WRITE, /* need to recover superblock */ 725 676 }; 726 677 727 678 enum { ··· 731 680 MAX_TIME, 732 681 }; 733 682 683 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 684 + #define F2FS_KEY_DESC_PREFIX "f2fs:" 685 + #define F2FS_KEY_DESC_PREFIX_SIZE 5 686 + #endif 734 687 struct f2fs_sb_info { 735 688 struct super_block *sb; /* pointer to VFS super block */ 736 689 struct proc_dir_entry *s_proc; /* proc entry */ ··· 742 687 int valid_super_block; /* valid super block no */ 743 688 int s_flag; /* flags for sbi */ 744 689 690 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 691 + u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; 692 + u8 key_prefix_size; 693 + #endif 745 694 /* for node-related operations */ 746 695 struct f2fs_nm_info *nm_info; /* node manager */ 747 696 struct inode *node_inode; /* cache node blocks */ ··· 801 742 unsigned int total_sections; /* total section count */ 802 743 unsigned int total_node_count; /* total node block count */ 803 744 unsigned int total_valid_node_count; /* valid node block count */ 804 - unsigned int total_valid_inode_count; /* valid inode count */ 805 745 loff_t max_file_blocks; /* max block index of file */ 806 746 int active_logs; /* # of active logs */ 807 747 int dir_level; /* directory level */ 808 748 809 749 block_t user_block_count; /* # of user blocks */ 810 750 block_t total_valid_block_count; /* # of valid blocks */ 811 - block_t alloc_valid_block_count; /* # of allocated blocks */ 812 751 block_t discard_blks; /* discard command candidats */ 813 752 block_t last_valid_block_count; /* for recovery */ 814 753 u32 s_next_generation; /* for NFS support */ 815 - atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ 754 + atomic_t nr_wb_bios; /* # of writeback bios */ 755 + 756 + /* # of pages, see count_type */ 757 + struct percpu_counter nr_pages[NR_COUNT_TYPE]; 758 + /* # of allocated blocks */ 759 + struct percpu_counter alloc_valid_block_count; 760 + 761 + /* valid inode count */ 762 + struct percpu_counter total_valid_inode_count; 816 763 817 764 struct f2fs_mount_info mount_opt; /* mount options */ 818 765 ··· 1120 1055 } 1121 1056 1122 1057 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 1123 - struct inode *inode, blkcnt_t count) 1058 + struct inode *inode, blkcnt_t *count) 1124 1059 { 1125 1060 block_t valid_block_count; 1126 1061 1127 1062 spin_lock(&sbi->stat_lock); 1128 - valid_block_count = 1129 - sbi->total_valid_block_count + (block_t)count; 1130 - if (unlikely(valid_block_count > sbi->user_block_count)) { 1063 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1064 + if (time_to_inject(FAULT_BLOCK)) { 1131 1065 spin_unlock(&sbi->stat_lock); 1132 1066 return false; 1133 1067 } 1134 - inode->i_blocks += count; 1135 - sbi->total_valid_block_count = valid_block_count; 1136 - sbi->alloc_valid_block_count += (block_t)count; 1068 + #endif 1069 + valid_block_count = 1070 + sbi->total_valid_block_count + (block_t)(*count); 1071 + if (unlikely(valid_block_count > sbi->user_block_count)) { 1072 + *count = sbi->user_block_count - sbi->total_valid_block_count; 1073 + if (!*count) { 1074 + spin_unlock(&sbi->stat_lock); 1075 + return false; 1076 + } 1077 + } 1078 + /* *count can be recalculated */ 1079 + inode->i_blocks += *count; 1080 + sbi->total_valid_block_count = 1081 + sbi->total_valid_block_count + (block_t)(*count); 1137 1082 spin_unlock(&sbi->stat_lock); 1083 + 1084 + percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); 1138 1085 return true; 1139 1086 } 1140 1087 ··· 1164 1087 1165 1088 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 1166 1089 { 1167 - atomic_inc(&sbi->nr_pages[count_type]); 1090 + percpu_counter_inc(&sbi->nr_pages[count_type]); 1168 1091 set_sbi_flag(sbi, SBI_IS_DIRTY); 1169 1092 } 1170 1093 1171 1094 static inline void inode_inc_dirty_pages(struct inode *inode) 1172 1095 { 1173 - atomic_inc(&F2FS_I(inode)->dirty_pages); 1096 + percpu_counter_inc(&F2FS_I(inode)->dirty_pages); 1174 1097 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1175 1098 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1176 1099 } 1177 1100 1178 1101 static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1179 1102 { 1180 - atomic_dec(&sbi->nr_pages[count_type]); 1103 + percpu_counter_dec(&sbi->nr_pages[count_type]); 1181 1104 } 1182 1105 1183 1106 static inline void inode_dec_dirty_pages(struct inode *inode) ··· 1186 1109 !S_ISLNK(inode->i_mode)) 1187 1110 return; 1188 1111 1189 - atomic_dec(&F2FS_I(inode)->dirty_pages); 1112 + percpu_counter_dec(&F2FS_I(inode)->dirty_pages); 1190 1113 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1191 1114 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1192 1115 } 1193 1116 1194 - static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 1117 + static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) 1195 1118 { 1196 - return atomic_read(&sbi->nr_pages[count_type]); 1119 + return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); 1197 1120 } 1198 1121 1199 - static inline int get_dirty_pages(struct inode *inode) 1122 + static inline s64 get_dirty_pages(struct inode *inode) 1200 1123 { 1201 - return atomic_read(&F2FS_I(inode)->dirty_pages); 1124 + return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages); 1202 1125 } 1203 1126 1204 1127 static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 1205 1128 { 1206 1129 unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; 1207 - return ((get_pages(sbi, block_type) + pages_per_sec - 1) 1208 - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; 1130 + unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> 1131 + sbi->log_blocks_per_seg; 1132 + 1133 + return segs / sbi->segs_per_sec; 1209 1134 } 1210 1135 1211 1136 static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) ··· 1296 1217 if (inode) 1297 1218 inode->i_blocks++; 1298 1219 1299 - sbi->alloc_valid_block_count++; 1300 1220 sbi->total_valid_node_count++; 1301 1221 sbi->total_valid_block_count++; 1302 1222 spin_unlock(&sbi->stat_lock); 1303 1223 1224 + percpu_counter_inc(&sbi->alloc_valid_block_count); 1304 1225 return true; 1305 1226 } 1306 1227 ··· 1327 1248 1328 1249 static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 1329 1250 { 1330 - spin_lock(&sbi->stat_lock); 1331 - f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); 1332 - sbi->total_valid_inode_count++; 1333 - spin_unlock(&sbi->stat_lock); 1251 + percpu_counter_inc(&sbi->total_valid_inode_count); 1334 1252 } 1335 1253 1336 1254 static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 1337 1255 { 1338 - spin_lock(&sbi->stat_lock); 1339 - f2fs_bug_on(sbi, !sbi->total_valid_inode_count); 1340 - sbi->total_valid_inode_count--; 1341 - spin_unlock(&sbi->stat_lock); 1256 + percpu_counter_dec(&sbi->total_valid_inode_count); 1342 1257 } 1343 1258 1344 - static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 1259 + static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) 1345 1260 { 1346 - return sbi->total_valid_inode_count; 1261 + return percpu_counter_sum_positive(&sbi->total_valid_inode_count); 1347 1262 } 1348 1263 1349 1264 static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, 1350 1265 pgoff_t index, bool for_write) 1351 1266 { 1267 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1268 + struct page *page = find_lock_page(mapping, index); 1269 + if (page) 1270 + return page; 1271 + 1272 + if (time_to_inject(FAULT_PAGE_ALLOC)) 1273 + return NULL; 1274 + #endif 1352 1275 if (!for_write) 1353 1276 return grab_cache_page(mapping, index); 1354 1277 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); ··· 1516 1435 FI_NO_ALLOC, /* should not allocate any blocks */ 1517 1436 FI_FREE_NID, /* free allocated nide */ 1518 1437 FI_UPDATE_DIR, /* should update inode block for consistency */ 1519 - FI_DELAY_IPUT, /* used for the recovery */ 1520 1438 FI_NO_EXTENT, /* not to use the extent cache */ 1521 1439 FI_INLINE_XATTR, /* used for inline xattr */ 1522 1440 FI_INLINE_DATA, /* used for inline data*/ ··· 1698 1618 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1699 1619 } 1700 1620 1701 - static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) 1702 - { 1703 - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1704 - sbi->sb->s_flags |= MS_RDONLY; 1705 - } 1706 - 1707 1621 static inline bool is_dot_dotdot(const struct qstr *str) 1708 1622 { 1709 1623 if (str->len == 1 && str->name[0] == '.') ··· 1716 1642 return false; 1717 1643 1718 1644 return S_ISREG(inode->i_mode); 1645 + } 1646 + 1647 + static inline void *f2fs_kmalloc(size_t size, gfp_t flags) 1648 + { 1649 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1650 + if (time_to_inject(FAULT_KMALLOC)) 1651 + return NULL; 1652 + #endif 1653 + return kmalloc(size, flags); 1719 1654 } 1720 1655 1721 1656 static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) ··· 1793 1710 */ 1794 1711 extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1795 1712 void set_de_type(struct f2fs_dir_entry *, umode_t); 1796 - 1713 + unsigned char get_de_type(struct f2fs_dir_entry *); 1797 1714 struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 1798 1715 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 1799 1716 bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, ··· 1814 1731 int update_dent_inode(struct inode *, struct inode *, const struct qstr *); 1815 1732 void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, 1816 1733 const struct qstr *, f2fs_hash_t , unsigned int); 1734 + int f2fs_add_regular_entry(struct inode *, const struct qstr *, 1735 + struct inode *, nid_t, umode_t); 1817 1736 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, 1818 1737 umode_t); 1819 1738 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, ··· 1866 1781 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1867 1782 struct page *get_node_page_ra(struct page *, int); 1868 1783 void sync_inode_page(struct dnode_of_data *); 1869 - int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); 1784 + void move_node_page(struct page *, int); 1785 + int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, 1786 + bool); 1787 + int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); 1870 1788 bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1871 1789 void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1872 1790 void alloc_nid_failed(struct f2fs_sb_info *, nid_t); ··· 1931 1843 /* 1932 1844 * checkpoint.c 1933 1845 */ 1846 + void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool); 1934 1847 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1935 1848 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1936 1849 struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); ··· 1941 1852 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1942 1853 void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1943 1854 void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1944 - void release_ino_entry(struct f2fs_sb_info *); 1855 + void release_ino_entry(struct f2fs_sb_info *, bool); 1945 1856 bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1946 1857 int acquire_orphan_inode(struct f2fs_sb_info *); 1947 1858 void release_orphan_inode(struct f2fs_sb_info *); ··· 1950 1861 int recover_orphan_inodes(struct f2fs_sb_info *); 1951 1862 int get_valid_checkpoint(struct f2fs_sb_info *); 1952 1863 void update_dirty_page(struct inode *, struct page *); 1953 - void add_dirty_dir_inode(struct inode *); 1954 1864 void remove_dirty_inode(struct inode *); 1955 1865 int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); 1956 1866 int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); ··· 1968 1880 void f2fs_submit_page_mbio(struct f2fs_io_info *); 1969 1881 void set_data_blkaddr(struct dnode_of_data *); 1970 1882 void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 1883 + int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); 1971 1884 int reserve_new_block(struct dnode_of_data *); 1972 1885 int f2fs_get_block(struct dnode_of_data *, pgoff_t); 1973 1886 ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); ··· 1995 1906 /* 1996 1907 * recovery.c 1997 1908 */ 1998 - int recover_fsync_data(struct f2fs_sb_info *); 1909 + int recover_fsync_data(struct f2fs_sb_info *, bool); 1999 1910 bool space_for_roll_forward(struct f2fs_sb_info *); 2000 1911 2001 1912 /* ··· 2010 1921 unsigned long long hit_largest, hit_cached, hit_rbtree; 2011 1922 unsigned long long hit_total, total_ext; 2012 1923 int ext_tree, zombie_tree, ext_node; 2013 - int ndirty_node, ndirty_meta; 2014 - int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; 1924 + s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; 1925 + unsigned int ndirty_dirs, ndirty_files; 2015 1926 int nats, dirty_nats, sits, dirty_sits, fnids; 2016 1927 int total_count, utilization; 2017 - int bg_gc, inmem_pages, wb_pages; 2018 - int inline_xattr, inline_inode, inline_dir; 1928 + int bg_gc, wb_bios; 1929 + int inline_xattr, inline_inode, inline_dir, orphans; 2019 1930 unsigned int valid_count, valid_node_count, valid_inode_count; 2020 1931 unsigned int bimodal, avg_vblocks; 2021 1932 int util_free, util_valid, util_invalid;

+223 -90

fs/f2fs/file.c

··· 182 182 } 183 183 } 184 184 185 - int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 185 + static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, 186 + int datasync, bool atomic) 186 187 { 187 188 struct inode *inode = file->f_mapping->host; 188 189 struct f2fs_inode_info *fi = F2FS_I(inode); ··· 257 256 goto out; 258 257 } 259 258 sync_nodes: 260 - sync_node_pages(sbi, ino, &wbc); 259 + ret = fsync_node_pages(sbi, ino, &wbc, atomic); 260 + if (ret) 261 + goto out; 261 262 262 263 /* if cp_error was enabled, we should avoid infinite loop */ 263 264 if (unlikely(f2fs_cp_error(sbi))) { ··· 289 286 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 290 287 f2fs_trace_ios(NULL, 1); 291 288 return ret; 289 + } 290 + 291 + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 292 + { 293 + return f2fs_do_sync_file(file, start, end, datasync, false); 292 294 } 293 295 294 296 static pgoff_t __get_first_dirty_index(struct address_space *mapping, ··· 563 555 564 556 free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); 565 557 558 + if (free_from >= sbi->max_file_blocks) 559 + goto free_partial; 560 + 566 561 if (lock) 567 562 f2fs_lock_op(sbi); 568 563 ··· 584 573 } 585 574 586 575 set_new_dnode(&dn, inode, ipage, NULL, 0); 587 - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 576 + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); 588 577 if (err) { 589 578 if (err == -ENOENT) 590 579 goto free_next; ··· 607 596 out: 608 597 if (lock) 609 598 f2fs_unlock_op(sbi); 610 - 599 + free_partial: 611 600 /* lastly zero out the first data page */ 612 601 if (!err) 613 602 err = truncate_partial_data_page(inode, from, truncate_page); ··· 997 986 return ret; 998 987 } 999 988 989 + static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, 990 + pgoff_t end) 991 + { 992 + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 993 + pgoff_t index = start; 994 + unsigned int ofs_in_node = dn->ofs_in_node; 995 + blkcnt_t count = 0; 996 + int ret; 997 + 998 + for (; index < end; index++, dn->ofs_in_node++) { 999 + if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR) 1000 + count++; 1001 + } 1002 + 1003 + dn->ofs_in_node = ofs_in_node; 1004 + ret = reserve_new_blocks(dn, count); 1005 + if (ret) 1006 + return ret; 1007 + 1008 + dn->ofs_in_node = ofs_in_node; 1009 + for (index = start; index < end; index++, dn->ofs_in_node++) { 1010 + dn->data_blkaddr = 1011 + datablock_addr(dn->node_page, dn->ofs_in_node); 1012 + /* 1013 + * reserve_new_blocks will not guarantee entire block 1014 + * allocation. 1015 + */ 1016 + if (dn->data_blkaddr == NULL_ADDR) { 1017 + ret = -ENOSPC; 1018 + break; 1019 + } 1020 + if (dn->data_blkaddr != NEW_ADDR) { 1021 + invalidate_blocks(sbi, dn->data_blkaddr); 1022 + dn->data_blkaddr = NEW_ADDR; 1023 + set_data_blkaddr(dn); 1024 + } 1025 + } 1026 + 1027 + f2fs_update_extent_cache_range(dn, start, 0, index - start); 1028 + 1029 + return ret; 1030 + } 1031 + 1000 1032 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1001 1033 int mode) 1002 1034 { ··· 1090 1036 (loff_t)pg_start << PAGE_SHIFT); 1091 1037 } 1092 1038 1093 - for (index = pg_start; index < pg_end; index++) { 1039 + for (index = pg_start; index < pg_end;) { 1094 1040 struct dnode_of_data dn; 1095 - struct page *ipage; 1041 + unsigned int end_offset; 1042 + pgoff_t end; 1096 1043 1097 1044 f2fs_lock_op(sbi); 1098 1045 1099 - ipage = get_node_page(sbi, inode->i_ino); 1100 - if (IS_ERR(ipage)) { 1101 - ret = PTR_ERR(ipage); 1102 - f2fs_unlock_op(sbi); 1103 - goto out; 1104 - } 1105 - 1106 - set_new_dnode(&dn, inode, ipage, NULL, 0); 1107 - ret = f2fs_reserve_block(&dn, index); 1046 + set_new_dnode(&dn, inode, NULL, NULL, 0); 1047 + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); 1108 1048 if (ret) { 1109 1049 f2fs_unlock_op(sbi); 1110 1050 goto out; 1111 1051 } 1112 1052 1113 - if (dn.data_blkaddr != NEW_ADDR) { 1114 - invalidate_blocks(sbi, dn.data_blkaddr); 1115 - f2fs_update_data_blkaddr(&dn, NEW_ADDR); 1116 - } 1053 + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 1054 + end = min(pg_end, end_offset - dn.ofs_in_node + index); 1055 + 1056 + ret = f2fs_do_zero_range(&dn, index, end); 1117 1057 f2fs_put_dnode(&dn); 1118 1058 f2fs_unlock_op(sbi); 1059 + if (ret) 1060 + goto out; 1119 1061 1062 + index = end; 1120 1063 new_size = max_t(loff_t, new_size, 1121 - (loff_t)(index + 1) << PAGE_SHIFT); 1064 + (loff_t)index << PAGE_SHIFT); 1122 1065 } 1123 1066 1124 1067 if (off_end) { ··· 1198 1147 loff_t len, int mode) 1199 1148 { 1200 1149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1201 - pgoff_t index, pg_start, pg_end; 1150 + struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; 1151 + pgoff_t pg_end; 1202 1152 loff_t new_size = i_size_read(inode); 1203 - loff_t off_start, off_end; 1204 - int ret = 0; 1153 + loff_t off_end; 1154 + int ret; 1205 1155 1206 1156 ret = inode_newsize_ok(inode, (len + offset)); 1207 1157 if (ret) ··· 1214 1162 1215 1163 f2fs_balance_fs(sbi, true); 1216 1164 1217 - pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1218 - pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; 1219 - 1220 - off_start = offset & (PAGE_SIZE - 1); 1165 + pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; 1221 1166 off_end = (offset + len) & (PAGE_SIZE - 1); 1222 1167 1223 - f2fs_lock_op(sbi); 1168 + map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT; 1169 + map.m_len = pg_end - map.m_lblk; 1170 + if (off_end) 1171 + map.m_len++; 1224 1172 1225 - for (index = pg_start; index <= pg_end; index++) { 1226 - struct dnode_of_data dn; 1173 + ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); 1174 + if (ret) { 1175 + pgoff_t last_off; 1227 1176 1228 - if (index == pg_end && !off_end) 1229 - goto noalloc; 1177 + if (!map.m_len) 1178 + return ret; 1230 1179 1231 - set_new_dnode(&dn, inode, NULL, NULL, 0); 1232 - ret = f2fs_reserve_block(&dn, index); 1233 - if (ret) 1234 - break; 1235 - noalloc: 1236 - if (pg_start == pg_end) 1237 - new_size = offset + len; 1238 - else if (index == pg_start && off_start) 1239 - new_size = (loff_t)(index + 1) << PAGE_SHIFT; 1240 - else if (index == pg_end) 1241 - new_size = ((loff_t)index << PAGE_SHIFT) + 1242 - off_end; 1243 - else 1244 - new_size += PAGE_SIZE; 1180 + last_off = map.m_lblk + map.m_len - 1; 1181 + 1182 + /* update new size to the failed position */ 1183 + new_size = (last_off == pg_end) ? offset + len: 1184 + (loff_t)(last_off + 1) << PAGE_SHIFT; 1185 + } else { 1186 + new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1245 1187 } 1246 1188 1247 - if (!(mode & FALLOC_FL_KEEP_SIZE) && 1248 - i_size_read(inode) < new_size) { 1189 + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { 1249 1190 i_size_write(inode, new_size); 1250 1191 mark_inode_dirty(inode); 1251 1192 update_inode_page(inode); 1252 1193 } 1253 - f2fs_unlock_op(sbi); 1254 1194 1255 1195 return ret; 1256 1196 } ··· 1298 1254 1299 1255 static int f2fs_release_file(struct inode *inode, struct file *filp) 1300 1256 { 1257 + /* 1258 + * f2fs_relase_file is called at every close calls. So we should 1259 + * not drop any inmemory pages by close called by other process. 1260 + */ 1261 + if (!(filp->f_mode & FMODE_WRITE) || 1262 + atomic_read(&inode->i_writecount) != 1) 1263 + return 0; 1264 + 1301 1265 /* some remained atomic pages should discarded */ 1302 1266 if (f2fs_is_atomic_file(inode)) 1303 1267 drop_inmem_pages(inode); 1304 1268 if (f2fs_is_volatile_file(inode)) { 1269 + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1305 1270 set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); 1306 1271 filemap_fdatawrite(inode->i_mapping); 1307 1272 clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); ··· 1347 1294 unsigned int oldflags; 1348 1295 int ret; 1349 1296 1297 + if (!inode_owner_or_capable(inode)) 1298 + return -EACCES; 1299 + 1300 + if (get_user(flags, (int __user *)arg)) 1301 + return -EFAULT; 1302 + 1350 1303 ret = mnt_want_write_file(filp); 1351 1304 if (ret) 1352 1305 return ret; 1353 - 1354 - if (!inode_owner_or_capable(inode)) { 1355 - ret = -EACCES; 1356 - goto out; 1357 - } 1358 - 1359 - if (get_user(flags, (int __user *)arg)) { 1360 - ret = -EFAULT; 1361 - goto out; 1362 - } 1363 1306 1364 1307 flags = f2fs_mask_flags(inode->i_mode, flags); 1365 1308 ··· 1399 1350 if (!inode_owner_or_capable(inode)) 1400 1351 return -EACCES; 1401 1352 1353 + ret = mnt_want_write_file(filp); 1354 + if (ret) 1355 + return ret; 1356 + 1357 + inode_lock(inode); 1358 + 1402 1359 if (f2fs_is_atomic_file(inode)) 1403 - return 0; 1360 + goto out; 1404 1361 1405 1362 ret = f2fs_convert_inline_inode(inode); 1406 1363 if (ret) 1407 - return ret; 1364 + goto out; 1408 1365 1409 1366 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1410 1367 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1411 1368 1412 - return 0; 1369 + if (!get_dirty_pages(inode)) 1370 + goto out; 1371 + 1372 + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, 1373 + "Unexpected flush for atomic writes: ino=%lu, npages=%lld", 1374 + inode->i_ino, get_dirty_pages(inode)); 1375 + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 1376 + if (ret) 1377 + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1378 + out: 1379 + inode_unlock(inode); 1380 + mnt_drop_write_file(filp); 1381 + return ret; 1413 1382 } 1414 1383 1415 1384 static int f2fs_ioc_commit_atomic_write(struct file *filp) ··· 1438 1371 if (!inode_owner_or_capable(inode)) 1439 1372 return -EACCES; 1440 1373 1441 - if (f2fs_is_volatile_file(inode)) 1442 - return 0; 1443 - 1444 1374 ret = mnt_want_write_file(filp); 1445 1375 if (ret) 1446 1376 return ret; 1377 + 1378 + inode_lock(inode); 1379 + 1380 + if (f2fs_is_volatile_file(inode)) 1381 + goto err_out; 1447 1382 1448 1383 if (f2fs_is_atomic_file(inode)) { 1449 1384 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); ··· 1456 1387 } 1457 1388 } 1458 1389 1459 - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1390 + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1460 1391 err_out: 1392 + inode_unlock(inode); 1461 1393 mnt_drop_write_file(filp); 1462 1394 return ret; 1463 1395 } ··· 1471 1401 if (!inode_owner_or_capable(inode)) 1472 1402 return -EACCES; 1473 1403 1474 - if (f2fs_is_volatile_file(inode)) 1475 - return 0; 1476 - 1477 - ret = f2fs_convert_inline_inode(inode); 1404 + ret = mnt_want_write_file(filp); 1478 1405 if (ret) 1479 1406 return ret; 1480 1407 1408 + inode_lock(inode); 1409 + 1410 + if (f2fs_is_volatile_file(inode)) 1411 + goto out; 1412 + 1413 + ret = f2fs_convert_inline_inode(inode); 1414 + if (ret) 1415 + goto out; 1416 + 1481 1417 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1482 1418 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1483 - return 0; 1419 + out: 1420 + inode_unlock(inode); 1421 + mnt_drop_write_file(filp); 1422 + return ret; 1484 1423 } 1485 1424 1486 1425 static int f2fs_ioc_release_volatile_write(struct file *filp) 1487 1426 { 1488 1427 struct inode *inode = file_inode(filp); 1428 + int ret; 1489 1429 1490 1430 if (!inode_owner_or_capable(inode)) 1491 1431 return -EACCES; 1492 1432 1433 + ret = mnt_want_write_file(filp); 1434 + if (ret) 1435 + return ret; 1436 + 1437 + inode_lock(inode); 1438 + 1493 1439 if (!f2fs_is_volatile_file(inode)) 1494 - return 0; 1440 + goto out; 1495 1441 1496 - if (!f2fs_is_first_block_written(inode)) 1497 - return truncate_partial_data_page(inode, 0, true); 1442 + if (!f2fs_is_first_block_written(inode)) { 1443 + ret = truncate_partial_data_page(inode, 0, true); 1444 + goto out; 1445 + } 1498 1446 1499 - return punch_hole(inode, 0, F2FS_BLKSIZE); 1447 + ret = punch_hole(inode, 0, F2FS_BLKSIZE); 1448 + out: 1449 + inode_unlock(inode); 1450 + mnt_drop_write_file(filp); 1451 + return ret; 1500 1452 } 1501 1453 1502 1454 static int f2fs_ioc_abort_volatile_write(struct file *filp) ··· 1533 1441 if (ret) 1534 1442 return ret; 1535 1443 1536 - if (f2fs_is_atomic_file(inode)) { 1537 - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1444 + inode_lock(inode); 1445 + 1446 + if (f2fs_is_atomic_file(inode)) 1538 1447 drop_inmem_pages(inode); 1539 - } 1540 1448 if (f2fs_is_volatile_file(inode)) { 1541 1449 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1542 - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1450 + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1543 1451 } 1452 + 1453 + inode_unlock(inode); 1544 1454 1545 1455 mnt_drop_write_file(filp); 1546 1456 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); ··· 1555 1461 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1556 1462 struct super_block *sb = sbi->sb; 1557 1463 __u32 in; 1464 + int ret; 1558 1465 1559 1466 if (!capable(CAP_SYS_ADMIN)) 1560 1467 return -EPERM; ··· 1563 1468 if (get_user(in, (__u32 __user *)arg)) 1564 1469 return -EFAULT; 1565 1470 1471 + ret = mnt_want_write_file(filp); 1472 + if (ret) 1473 + return ret; 1474 + 1566 1475 switch (in) { 1567 1476 case F2FS_GOING_DOWN_FULLSYNC: 1568 1477 sb = freeze_bdev(sb->s_bdev); 1569 1478 if (sb && !IS_ERR(sb)) { 1570 - f2fs_stop_checkpoint(sbi); 1479 + f2fs_stop_checkpoint(sbi, false); 1571 1480 thaw_bdev(sb->s_bdev, sb); 1572 1481 } 1573 1482 break; 1574 1483 case F2FS_GOING_DOWN_METASYNC: 1575 1484 /* do checkpoint only */ 1576 1485 f2fs_sync_fs(sb, 1); 1577 - f2fs_stop_checkpoint(sbi); 1486 + f2fs_stop_checkpoint(sbi, false); 1578 1487 break; 1579 1488 case F2FS_GOING_DOWN_NOSYNC: 1580 - f2fs_stop_checkpoint(sbi); 1489 + f2fs_stop_checkpoint(sbi, false); 1581 1490 break; 1582 1491 case F2FS_GOING_DOWN_METAFLUSH: 1583 1492 sync_meta_pages(sbi, META, LONG_MAX); 1584 - f2fs_stop_checkpoint(sbi); 1493 + f2fs_stop_checkpoint(sbi, false); 1585 1494 break; 1586 1495 default: 1587 - return -EINVAL; 1496 + ret = -EINVAL; 1497 + goto out; 1588 1498 } 1589 1499 f2fs_update_time(sbi, REQ_TIME); 1590 - return 0; 1500 + out: 1501 + mnt_drop_write_file(filp); 1502 + return ret; 1591 1503 } 1592 1504 1593 1505 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) ··· 1615 1513 sizeof(range))) 1616 1514 return -EFAULT; 1617 1515 1516 + ret = mnt_want_write_file(filp); 1517 + if (ret) 1518 + return ret; 1519 + 1618 1520 range.minlen = max((unsigned int)range.minlen, 1619 1521 q->limits.discard_granularity); 1620 1522 ret = f2fs_trim_fs(F2FS_SB(sb), &range); 1523 + mnt_drop_write_file(filp); 1621 1524 if (ret < 0) 1622 1525 return ret; 1623 1526 ··· 1647 1540 { 1648 1541 struct fscrypt_policy policy; 1649 1542 struct inode *inode = file_inode(filp); 1543 + int ret; 1650 1544 1651 1545 if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, 1652 1546 sizeof(policy))) 1653 1547 return -EFAULT; 1654 1548 1549 + ret = mnt_want_write_file(filp); 1550 + if (ret) 1551 + return ret; 1552 + 1655 1553 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1656 - return fscrypt_process_policy(inode, &policy); 1554 + ret = fscrypt_process_policy(inode, &policy); 1555 + 1556 + mnt_drop_write_file(filp); 1557 + return ret; 1657 1558 } 1658 1559 1659 1560 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) ··· 1718 1603 struct inode *inode = file_inode(filp); 1719 1604 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1720 1605 __u32 sync; 1606 + int ret; 1721 1607 1722 1608 if (!capable(CAP_SYS_ADMIN)) 1723 1609 return -EPERM; ··· 1729 1613 if (f2fs_readonly(sbi->sb)) 1730 1614 return -EROFS; 1731 1615 1616 + ret = mnt_want_write_file(filp); 1617 + if (ret) 1618 + return ret; 1619 + 1732 1620 if (!sync) { 1733 - if (!mutex_trylock(&sbi->gc_mutex)) 1734 - return -EBUSY; 1621 + if (!mutex_trylock(&sbi->gc_mutex)) { 1622 + ret = -EBUSY; 1623 + goto out; 1624 + } 1735 1625 } else { 1736 1626 mutex_lock(&sbi->gc_mutex); 1737 1627 } 1738 1628 1739 - return f2fs_gc(sbi, sync); 1629 + ret = f2fs_gc(sbi, sync); 1630 + out: 1631 + mnt_drop_write_file(filp); 1632 + return ret; 1740 1633 } 1741 1634 1742 1635 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) 1743 1636 { 1744 1637 struct inode *inode = file_inode(filp); 1745 1638 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1639 + int ret; 1746 1640 1747 1641 if (!capable(CAP_SYS_ADMIN)) 1748 1642 return -EPERM; ··· 1760 1634 if (f2fs_readonly(sbi->sb)) 1761 1635 return -EROFS; 1762 1636 1763 - return f2fs_sync_fs(sbi->sb, 1); 1637 + ret = mnt_want_write_file(filp); 1638 + if (ret) 1639 + return ret; 1640 + 1641 + ret = f2fs_sync_fs(sbi->sb, 1); 1642 + 1643 + mnt_drop_write_file(filp); 1644 + return ret; 1764 1645 } 1765 1646 1766 1647 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,

+5 -22

fs/f2fs/gc.c

··· 96 96 dev_t dev = sbi->sb->s_bdev->bd_dev; 97 97 int err = 0; 98 98 99 - gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 99 + gc_th = f2fs_kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 100 100 if (!gc_th) { 101 101 err = -ENOMEM; 102 102 goto out; ··· 465 465 continue; 466 466 } 467 467 468 - /* set page dirty and write it */ 469 - if (gc_type == FG_GC) { 470 - f2fs_wait_on_page_writeback(node_page, NODE, true); 471 - set_page_dirty(node_page); 472 - } else { 473 - if (!PageWriteback(node_page)) 474 - set_page_dirty(node_page); 475 - } 476 - f2fs_put_page(node_page, 1); 468 + move_node_page(node_page, gc_type); 477 469 stat_inc_node_blk_count(sbi, 1, gc_type); 478 470 } 479 471 ··· 826 834 f2fs_put_page(sum_page, 0); 827 835 } 828 836 829 - if (gc_type == FG_GC) { 830 - if (type == SUM_TYPE_NODE) { 831 - struct writeback_control wbc = { 832 - .sync_mode = WB_SYNC_ALL, 833 - .nr_to_write = LONG_MAX, 834 - .for_reclaim = 0, 835 - }; 836 - sync_node_pages(sbi, 0, &wbc); 837 - } else { 838 - f2fs_submit_merged_bio(sbi, DATA, WRITE); 839 - } 840 - } 837 + if (gc_type == FG_GC) 838 + f2fs_submit_merged_bio(sbi, 839 + (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); 841 840 842 841 blk_finish_plug(&plug); 843 842

+103 -8

fs/f2fs/inline.c

··· 161 161 if (!f2fs_has_inline_data(inode)) 162 162 return 0; 163 163 164 - page = grab_cache_page(inode->i_mapping, 0); 164 + page = f2fs_grab_cache_page(inode->i_mapping, 0, false); 165 165 if (!page) 166 166 return -ENOMEM; 167 167 ··· 303 303 else 304 304 f2fs_put_page(ipage, 0); 305 305 306 - /* 307 - * For the most part, it should be a bug when name_len is zero. 308 - * We stop here for figuring out where the bugs has occurred. 309 - */ 310 - f2fs_bug_on(sbi, d.max < 0); 311 306 return de; 312 307 } 313 308 ··· 350 355 * NOTE: ipage is grabbed by caller, but if any error occurs, we should 351 356 * release ipage in this function. 352 357 */ 353 - static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 358 + static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, 354 359 struct f2fs_inline_dentry *inline_dentry) 355 360 { 356 361 struct page *page; ··· 358 363 struct f2fs_dentry_block *dentry_blk; 359 364 int err; 360 365 361 - page = grab_cache_page(dir->i_mapping, 0); 366 + page = f2fs_grab_cache_page(dir->i_mapping, 0, false); 362 367 if (!page) { 363 368 f2fs_put_page(ipage, 1); 364 369 return -ENOMEM; ··· 400 405 stat_dec_inline_dir(dir); 401 406 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 402 407 408 + F2FS_I(dir)->i_current_depth = 1; 403 409 if (i_size_read(dir) < PAGE_SIZE) { 404 410 i_size_write(dir, PAGE_SIZE); 405 411 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); ··· 410 414 out: 411 415 f2fs_put_page(page, 1); 412 416 return err; 417 + } 418 + 419 + static int f2fs_add_inline_entries(struct inode *dir, 420 + struct f2fs_inline_dentry *inline_dentry) 421 + { 422 + struct f2fs_dentry_ptr d; 423 + unsigned long bit_pos = 0; 424 + int err = 0; 425 + 426 + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); 427 + 428 + while (bit_pos < d.max) { 429 + struct f2fs_dir_entry *de; 430 + struct qstr new_name; 431 + nid_t ino; 432 + umode_t fake_mode; 433 + 434 + if (!test_bit_le(bit_pos, d.bitmap)) { 435 + bit_pos++; 436 + continue; 437 + } 438 + 439 + de = &d.dentry[bit_pos]; 440 + 441 + if (unlikely(!de->name_len)) { 442 + bit_pos++; 443 + continue; 444 + } 445 + 446 + new_name.name = d.filename[bit_pos]; 447 + new_name.len = de->name_len; 448 + 449 + ino = le32_to_cpu(de->ino); 450 + fake_mode = get_de_type(de) << S_SHIFT; 451 + 452 + err = f2fs_add_regular_entry(dir, &new_name, NULL, 453 + ino, fake_mode); 454 + if (err) 455 + goto punch_dentry_pages; 456 + 457 + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 458 + } 459 + return 0; 460 + punch_dentry_pages: 461 + truncate_inode_pages(&dir->i_data, 0); 462 + truncate_blocks(dir, 0, false); 463 + remove_dirty_inode(dir); 464 + return err; 465 + } 466 + 467 + static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, 468 + struct f2fs_inline_dentry *inline_dentry) 469 + { 470 + struct f2fs_inline_dentry *backup_dentry; 471 + struct f2fs_inode_info *fi = F2FS_I(dir); 472 + int err; 473 + 474 + backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry), 475 + GFP_F2FS_ZERO); 476 + if (!backup_dentry) { 477 + f2fs_put_page(ipage, 1); 478 + return -ENOMEM; 479 + } 480 + 481 + memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); 482 + truncate_inline_inode(ipage, 0); 483 + 484 + unlock_page(ipage); 485 + 486 + err = f2fs_add_inline_entries(dir, backup_dentry); 487 + if (err) 488 + goto recover; 489 + 490 + lock_page(ipage); 491 + 492 + stat_dec_inline_dir(dir); 493 + clear_inode_flag(fi, FI_INLINE_DENTRY); 494 + update_inode(dir, ipage); 495 + kfree(backup_dentry); 496 + return 0; 497 + recover: 498 + lock_page(ipage); 499 + memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); 500 + fi->i_current_depth = 0; 501 + i_size_write(dir, MAX_INLINE_DATA); 502 + update_inode(dir, ipage); 503 + f2fs_put_page(ipage, 1); 504 + 505 + kfree(backup_dentry); 506 + return err; 507 + } 508 + 509 + static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 510 + struct f2fs_inline_dentry *inline_dentry) 511 + { 512 + if (!F2FS_I(dir)->i_dir_level) 513 + return f2fs_move_inline_dirents(dir, ipage, inline_dentry); 514 + else 515 + return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); 413 516 } 414 517 415 518 int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,

+26 -40

fs/f2fs/inode.c

··· 283 283 cond_resched(); 284 284 goto retry; 285 285 } else if (err != -ENOENT) { 286 - f2fs_stop_checkpoint(sbi); 286 + f2fs_stop_checkpoint(sbi, false); 287 287 } 288 288 return 0; 289 289 } ··· 344 344 sb_start_intwrite(inode->i_sb); 345 345 set_inode_flag(fi, FI_NO_ALLOC); 346 346 i_size_write(inode, 0); 347 - 347 + retry: 348 348 if (F2FS_HAS_BLOCKS(inode)) 349 349 err = f2fs_truncate(inode, true); 350 350 ··· 352 352 f2fs_lock_op(sbi); 353 353 err = remove_inode_page(inode); 354 354 f2fs_unlock_op(sbi); 355 + } 356 + 357 + /* give more chances, if ENOMEM case */ 358 + if (err == -ENOMEM) { 359 + err = 0; 360 + goto retry; 355 361 } 356 362 357 363 sb_end_intwrite(inode->i_sb); ··· 374 368 if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) 375 369 add_ino_entry(sbi, inode->i_ino, UPDATE_INO); 376 370 if (is_inode_flag_set(fi, FI_FREE_NID)) { 377 - if (err && err != -ENOENT) 378 - alloc_nid_done(sbi, inode->i_ino); 379 - else 380 - alloc_nid_failed(sbi, inode->i_ino); 371 + alloc_nid_failed(sbi, inode->i_ino); 381 372 clear_inode_flag(fi, FI_FREE_NID); 382 373 } 383 - 384 - if (err && err != -ENOENT) { 385 - if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) { 386 - /* 387 - * get here because we failed to release resource 388 - * of inode previously, reminder our user to run fsck 389 - * for fixing. 390 - */ 391 - set_sbi_flag(sbi, SBI_NEED_FSCK); 392 - f2fs_msg(sbi->sb, KERN_WARNING, 393 - "inode (ino:%lu) resource leak, run fsck " 394 - "to fix this issue!", inode->i_ino); 395 - } 396 - } 374 + f2fs_bug_on(sbi, err && 375 + !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); 397 376 out_clear: 398 377 fscrypt_put_encryption_info(inode, NULL); 399 378 clear_inode(inode); ··· 388 397 void handle_failed_inode(struct inode *inode) 389 398 { 390 399 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 391 - int err = 0; 400 + struct node_info ni; 392 401 393 - clear_nlink(inode); 394 - make_bad_inode(inode); 402 + /* don't make bad inode, since it becomes a regular file. */ 395 403 unlock_new_inode(inode); 396 404 397 - i_size_write(inode, 0); 398 - if (F2FS_HAS_BLOCKS(inode)) 399 - err = f2fs_truncate(inode, false); 400 - 401 - if (!err) 402 - err = remove_inode_page(inode); 403 - 404 405 /* 405 - * if we skip truncate_node in remove_inode_page bacause we failed 406 - * before, it's better to find another way to release resource of 407 - * this inode (e.g. valid block count, node block or nid). Here we 408 - * choose to add this inode to orphan list, so that we can call iput 409 - * for releasing in orphan recovery flow. 410 - * 411 406 * Note: we should add inode to orphan list before f2fs_unlock_op() 412 407 * so we can prevent losing this orphan when encoutering checkpoint 413 408 * and following suddenly power-off. 414 409 */ 415 - if (err && err != -ENOENT) { 416 - err = acquire_orphan_inode(sbi); 417 - if (!err) 410 + get_node_info(sbi, inode->i_ino, &ni); 411 + 412 + if (ni.blk_addr != NULL_ADDR) { 413 + int err = acquire_orphan_inode(sbi); 414 + if (err) { 415 + set_sbi_flag(sbi, SBI_NEED_FSCK); 416 + f2fs_msg(sbi->sb, KERN_WARNING, 417 + "Too many orphan inodes, run fsck to fix."); 418 + } else { 418 419 add_orphan_inode(sbi, inode->i_ino); 420 + } 421 + alloc_nid_done(sbi, inode->i_ino); 422 + } else { 423 + set_inode_flag(F2FS_I(inode), FI_FREE_NID); 419 424 } 420 425 421 - set_inode_flag(F2FS_I(inode), FI_FREE_NID); 422 426 f2fs_unlock_op(sbi); 423 427 424 428 /* iput will drop the inode object */

+244 -72

fs/f2fs/node.c

··· 407 407 up_write(&nm_i->nat_tree_lock); 408 408 } 409 409 410 + /* 411 + * readahead MAX_RA_NODE number of node pages. 412 + */ 413 + static void ra_node_pages(struct page *parent, int start, int n) 414 + { 415 + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 416 + struct blk_plug plug; 417 + int i, end; 418 + nid_t nid; 419 + 420 + blk_start_plug(&plug); 421 + 422 + /* Then, try readahead for siblings of the desired node */ 423 + end = start + n; 424 + end = min(end, NIDS_PER_BLOCK); 425 + for (i = start; i < end; i++) { 426 + nid = get_nid(parent, i, false); 427 + ra_node_page(sbi, nid); 428 + } 429 + 430 + blk_finish_plug(&plug); 431 + } 432 + 410 433 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 411 434 { 412 435 const long direct_index = ADDRS_PER_INODE(dn->inode); ··· 730 707 return PTR_ERR(page); 731 708 } 732 709 710 + ra_node_pages(page, ofs, NIDS_PER_BLOCK); 711 + 733 712 rn = F2FS_NODE(page); 734 713 if (depth < 3) { 735 714 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { ··· 809 784 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 810 785 } 811 786 787 + ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); 788 + 812 789 /* free direct nodes linked to a partial indirect node */ 813 790 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 814 791 child_nid = get_nid(pages[idx], i, false); ··· 859 832 trace_f2fs_truncate_inode_blocks_enter(inode, from); 860 833 861 834 level = get_node_path(inode, from, offset, noffset); 862 - restart: 835 + 863 836 page = get_node_page(sbi, inode->i_ino); 864 837 if (IS_ERR(page)) { 865 838 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); ··· 923 896 if (offset[1] == 0 && 924 897 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 925 898 lock_page(page); 926 - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 927 - f2fs_put_page(page, 1); 928 - goto restart; 929 - } 899 + BUG_ON(page->mapping != NODE_MAPPING(sbi)); 930 900 f2fs_wait_on_page_writeback(page, NODE, true); 931 901 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 932 902 set_page_dirty(page); ··· 1022 998 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1023 999 return ERR_PTR(-EPERM); 1024 1000 1025 - page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 1001 + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); 1026 1002 if (!page) 1027 1003 return ERR_PTR(-ENOMEM); 1028 1004 ··· 1114 1090 if (apage) 1115 1091 return; 1116 1092 1117 - apage = grab_cache_page(NODE_MAPPING(sbi), nid); 1093 + apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1118 1094 if (!apage) 1119 1095 return; 1120 1096 1121 1097 err = read_node_page(apage, READA); 1122 1098 f2fs_put_page(apage, err ? 1 : 0); 1123 - } 1124 - 1125 - /* 1126 - * readahead MAX_RA_NODE number of node pages. 1127 - */ 1128 - static void ra_node_pages(struct page *parent, int start) 1129 - { 1130 - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); 1131 - struct blk_plug plug; 1132 - int i, end; 1133 - nid_t nid; 1134 - 1135 - blk_start_plug(&plug); 1136 - 1137 - /* Then, try readahead for siblings of the desired node */ 1138 - end = start + MAX_RA_NODE; 1139 - end = min(end, NIDS_PER_BLOCK); 1140 - for (i = start; i < end; i++) { 1141 - nid = get_nid(parent, i, false); 1142 - ra_node_page(sbi, nid); 1143 - } 1144 - 1145 - blk_finish_plug(&plug); 1146 1099 } 1147 1100 1148 1101 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, ··· 1132 1131 return ERR_PTR(-ENOENT); 1133 1132 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1134 1133 repeat: 1135 - page = grab_cache_page(NODE_MAPPING(sbi), nid); 1134 + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); 1136 1135 if (!page) 1137 1136 return ERR_PTR(-ENOMEM); 1138 1137 ··· 1145 1144 } 1146 1145 1147 1146 if (parent) 1148 - ra_node_pages(parent, start + 1); 1147 + ra_node_pages(parent, start + 1, MAX_RA_NODE); 1149 1148 1150 1149 lock_page(page); 1151 1150 ··· 1197 1196 { 1198 1197 struct inode *inode; 1199 1198 struct page *page; 1199 + int ret; 1200 1200 1201 1201 /* should flush inline_data before evict_inode */ 1202 1202 inode = ilookup(sbi->sb, ino); 1203 1203 if (!inode) 1204 1204 return; 1205 1205 1206 - page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0); 1206 + page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); 1207 1207 if (!page) 1208 1208 goto iput_out; 1209 - 1210 - if (!trylock_page(page)) 1211 - goto release_out; 1212 1209 1213 1210 if (!PageUptodate(page)) 1214 1211 goto page_out; ··· 1217 1218 if (!clear_page_dirty_for_io(page)) 1218 1219 goto page_out; 1219 1220 1220 - if (!f2fs_write_inline_data(inode, page)) 1221 - inode_dec_dirty_pages(inode); 1222 - else 1221 + ret = f2fs_write_inline_data(inode, page); 1222 + inode_dec_dirty_pages(inode); 1223 + if (ret) 1223 1224 set_page_dirty(page); 1224 1225 page_out: 1225 - unlock_page(page); 1226 - release_out: 1227 - f2fs_put_page(page, 0); 1226 + f2fs_put_page(page, 1); 1228 1227 iput_out: 1229 1228 iput(inode); 1230 1229 } 1231 1230 1232 - int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1233 - struct writeback_control *wbc) 1231 + void move_node_page(struct page *node_page, int gc_type) 1232 + { 1233 + if (gc_type == FG_GC) { 1234 + struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); 1235 + struct writeback_control wbc = { 1236 + .sync_mode = WB_SYNC_ALL, 1237 + .nr_to_write = 1, 1238 + .for_reclaim = 0, 1239 + }; 1240 + 1241 + set_page_dirty(node_page); 1242 + f2fs_wait_on_page_writeback(node_page, NODE, true); 1243 + 1244 + f2fs_bug_on(sbi, PageWriteback(node_page)); 1245 + if (!clear_page_dirty_for_io(node_page)) 1246 + goto out_page; 1247 + 1248 + if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) 1249 + unlock_page(node_page); 1250 + goto release_page; 1251 + } else { 1252 + /* set page dirty and write it */ 1253 + if (!PageWriteback(node_page)) 1254 + set_page_dirty(node_page); 1255 + } 1256 + out_page: 1257 + unlock_page(node_page); 1258 + release_page: 1259 + f2fs_put_page(node_page, 0); 1260 + } 1261 + 1262 + static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1234 1263 { 1235 1264 pgoff_t index, end; 1236 1265 struct pagevec pvec; 1237 - int step = ino ? 2 : 0; 1266 + struct page *last_page = NULL; 1267 + 1268 + pagevec_init(&pvec, 0); 1269 + index = 0; 1270 + end = ULONG_MAX; 1271 + 1272 + while (index <= end) { 1273 + int i, nr_pages; 1274 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1275 + PAGECACHE_TAG_DIRTY, 1276 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1277 + if (nr_pages == 0) 1278 + break; 1279 + 1280 + for (i = 0; i < nr_pages; i++) { 1281 + struct page *page = pvec.pages[i]; 1282 + 1283 + if (unlikely(f2fs_cp_error(sbi))) { 1284 + f2fs_put_page(last_page, 0); 1285 + pagevec_release(&pvec); 1286 + return ERR_PTR(-EIO); 1287 + } 1288 + 1289 + if (!IS_DNODE(page) || !is_cold_node(page)) 1290 + continue; 1291 + if (ino_of_node(page) != ino) 1292 + continue; 1293 + 1294 + lock_page(page); 1295 + 1296 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1297 + continue_unlock: 1298 + unlock_page(page); 1299 + continue; 1300 + } 1301 + if (ino_of_node(page) != ino) 1302 + goto continue_unlock; 1303 + 1304 + if (!PageDirty(page)) { 1305 + /* someone wrote it for us */ 1306 + goto continue_unlock; 1307 + } 1308 + 1309 + if (last_page) 1310 + f2fs_put_page(last_page, 0); 1311 + 1312 + get_page(page); 1313 + last_page = page; 1314 + unlock_page(page); 1315 + } 1316 + pagevec_release(&pvec); 1317 + cond_resched(); 1318 + } 1319 + return last_page; 1320 + } 1321 + 1322 + int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1323 + struct writeback_control *wbc, bool atomic) 1324 + { 1325 + pgoff_t index, end; 1326 + struct pagevec pvec; 1327 + int ret = 0; 1328 + struct page *last_page = NULL; 1329 + bool marked = false; 1330 + 1331 + if (atomic) { 1332 + last_page = last_fsync_dnode(sbi, ino); 1333 + if (IS_ERR_OR_NULL(last_page)) 1334 + return PTR_ERR_OR_ZERO(last_page); 1335 + } 1336 + retry: 1337 + pagevec_init(&pvec, 0); 1338 + index = 0; 1339 + end = ULONG_MAX; 1340 + 1341 + while (index <= end) { 1342 + int i, nr_pages; 1343 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1344 + PAGECACHE_TAG_DIRTY, 1345 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1346 + if (nr_pages == 0) 1347 + break; 1348 + 1349 + for (i = 0; i < nr_pages; i++) { 1350 + struct page *page = pvec.pages[i]; 1351 + 1352 + if (unlikely(f2fs_cp_error(sbi))) { 1353 + f2fs_put_page(last_page, 0); 1354 + pagevec_release(&pvec); 1355 + return -EIO; 1356 + } 1357 + 1358 + if (!IS_DNODE(page) || !is_cold_node(page)) 1359 + continue; 1360 + if (ino_of_node(page) != ino) 1361 + continue; 1362 + 1363 + lock_page(page); 1364 + 1365 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1366 + continue_unlock: 1367 + unlock_page(page); 1368 + continue; 1369 + } 1370 + if (ino_of_node(page) != ino) 1371 + goto continue_unlock; 1372 + 1373 + if (!PageDirty(page) && page != last_page) { 1374 + /* someone wrote it for us */ 1375 + goto continue_unlock; 1376 + } 1377 + 1378 + f2fs_wait_on_page_writeback(page, NODE, true); 1379 + BUG_ON(PageWriteback(page)); 1380 + 1381 + if (!atomic || page == last_page) { 1382 + set_fsync_mark(page, 1); 1383 + if (IS_INODE(page)) 1384 + set_dentry_mark(page, 1385 + need_dentry_mark(sbi, ino)); 1386 + /* may be written by other thread */ 1387 + if (!PageDirty(page)) 1388 + set_page_dirty(page); 1389 + } 1390 + 1391 + if (!clear_page_dirty_for_io(page)) 1392 + goto continue_unlock; 1393 + 1394 + ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1395 + if (ret) { 1396 + unlock_page(page); 1397 + f2fs_put_page(last_page, 0); 1398 + break; 1399 + } 1400 + if (page == last_page) { 1401 + f2fs_put_page(page, 0); 1402 + marked = true; 1403 + break; 1404 + } 1405 + } 1406 + pagevec_release(&pvec); 1407 + cond_resched(); 1408 + 1409 + if (ret || marked) 1410 + break; 1411 + } 1412 + if (!ret && atomic && !marked) { 1413 + f2fs_msg(sbi->sb, KERN_DEBUG, 1414 + "Retry to write fsync mark: ino=%u, idx=%lx", 1415 + ino, last_page->index); 1416 + lock_page(last_page); 1417 + set_page_dirty(last_page); 1418 + unlock_page(last_page); 1419 + goto retry; 1420 + } 1421 + return ret ? -EIO: 0; 1422 + } 1423 + 1424 + int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) 1425 + { 1426 + pgoff_t index, end; 1427 + struct pagevec pvec; 1428 + int step = 0; 1238 1429 int nwritten = 0; 1239 1430 1240 1431 pagevec_init(&pvec, 0); ··· 1463 1274 if (step == 2 && (!IS_DNODE(page) || 1464 1275 !is_cold_node(page))) 1465 1276 continue; 1466 - 1467 - /* 1468 - * If an fsync mode, 1469 - * we should not skip writing node pages. 1470 - */ 1471 1277 lock_node: 1472 - if (ino && ino_of_node(page) == ino) 1473 - lock_page(page); 1474 - else if (!trylock_page(page)) 1278 + if (!trylock_page(page)) 1475 1279 continue; 1476 1280 1477 1281 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { ··· 1472 1290 unlock_page(page); 1473 1291 continue; 1474 1292 } 1475 - if (ino && ino_of_node(page) != ino) 1476 - goto continue_unlock; 1477 1293 1478 1294 if (!PageDirty(page)) { 1479 1295 /* someone wrote it for us */ ··· 1479 1299 } 1480 1300 1481 1301 /* flush inline_data */ 1482 - if (!ino && is_inline_node(page)) { 1302 + if (is_inline_node(page)) { 1483 1303 clear_inline_node(page); 1484 1304 unlock_page(page); 1485 1305 flush_inline_data(sbi, ino_of_node(page)); ··· 1492 1312 if (!clear_page_dirty_for_io(page)) 1493 1313 goto continue_unlock; 1494 1314 1495 - /* called by fsync() */ 1496 - if (ino && IS_DNODE(page)) { 1497 - set_fsync_mark(page, 1); 1498 - if (IS_INODE(page)) 1499 - set_dentry_mark(page, 1500 - need_dentry_mark(sbi, ino)); 1501 - nwritten++; 1502 - } else { 1503 - set_fsync_mark(page, 0); 1504 - set_dentry_mark(page, 0); 1505 - } 1315 + set_fsync_mark(page, 0); 1316 + set_dentry_mark(page, 0); 1506 1317 1507 1318 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1508 1319 unlock_page(page); ··· 1641 1470 1642 1471 diff = nr_pages_to_write(sbi, NODE, wbc); 1643 1472 wbc->sync_mode = WB_SYNC_NONE; 1644 - sync_node_pages(sbi, 0, wbc); 1473 + sync_node_pages(sbi, wbc); 1645 1474 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1646 1475 return 0; 1647 1476 ··· 1695 1524 struct f2fs_nm_info *nm_i = NM_I(sbi); 1696 1525 struct free_nid *i; 1697 1526 struct nat_entry *ne; 1698 - bool allocated = false; 1699 1527 1700 1528 if (!available_free_memory(sbi, FREE_NIDS)) 1701 1529 return -1; ··· 1708 1538 ne = __lookup_nat_cache(nm_i, nid); 1709 1539 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1710 1540 nat_get_blkaddr(ne) != NULL_ADDR)) 1711 - allocated = true; 1712 - if (allocated) 1713 1541 return 0; 1714 1542 } 1715 1543 ··· 1840 1672 struct f2fs_nm_info *nm_i = NM_I(sbi); 1841 1673 struct free_nid *i = NULL; 1842 1674 retry: 1675 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1676 + if (time_to_inject(FAULT_ALLOC_NID)) 1677 + return false; 1678 + #endif 1843 1679 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1844 1680 return false; 1845 1681 ··· 2018 1846 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2019 1847 return -EINVAL; 2020 1848 2021 - ipage = grab_cache_page(NODE_MAPPING(sbi), ino); 1849 + ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); 2022 1850 if (!ipage) 2023 1851 return -ENOMEM; 2024 1852

+87 -62

fs/f2fs/recovery.c

··· 49 49 50 50 bool space_for_roll_forward(struct f2fs_sb_info *sbi) 51 51 { 52 - if (sbi->last_valid_block_count + sbi->alloc_valid_block_count 53 - > sbi->user_block_count) 52 + s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); 53 + 54 + if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) 54 55 return false; 55 56 return true; 56 57 } ··· 68 67 return NULL; 69 68 } 70 69 71 - static int recover_dentry(struct inode *inode, struct page *ipage) 70 + static struct fsync_inode_entry *add_fsync_inode(struct list_head *head, 71 + struct inode *inode) 72 + { 73 + struct fsync_inode_entry *entry; 74 + 75 + entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 76 + if (!entry) 77 + return NULL; 78 + 79 + entry->inode = inode; 80 + list_add_tail(&entry->list, head); 81 + 82 + return entry; 83 + } 84 + 85 + static void del_fsync_inode(struct fsync_inode_entry *entry) 86 + { 87 + iput(entry->inode); 88 + list_del(&entry->list); 89 + kmem_cache_free(fsync_entry_slab, entry); 90 + } 91 + 92 + static int recover_dentry(struct inode *inode, struct page *ipage, 93 + struct list_head *dir_list) 72 94 { 73 95 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 74 96 nid_t pino = le32_to_cpu(raw_inode->i_pino); ··· 99 75 struct qstr name; 100 76 struct page *page; 101 77 struct inode *dir, *einode; 78 + struct fsync_inode_entry *entry; 102 79 int err = 0; 103 80 104 - dir = f2fs_iget(inode->i_sb, pino); 105 - if (IS_ERR(dir)) { 106 - err = PTR_ERR(dir); 107 - goto out; 81 + entry = get_fsync_inode(dir_list, pino); 82 + if (!entry) { 83 + dir = f2fs_iget(inode->i_sb, pino); 84 + if (IS_ERR(dir)) { 85 + err = PTR_ERR(dir); 86 + goto out; 87 + } 88 + 89 + entry = add_fsync_inode(dir_list, dir); 90 + if (!entry) { 91 + err = -ENOMEM; 92 + iput(dir); 93 + goto out; 94 + } 108 95 } 109 96 110 - if (file_enc_name(inode)) { 111 - iput(dir); 97 + dir = entry->inode; 98 + 99 + if (file_enc_name(inode)) 112 100 return 0; 113 - } 114 101 115 102 name.len = le32_to_cpu(raw_inode->i_namelen); 116 103 name.name = raw_inode->i_name; ··· 129 94 if (unlikely(name.len > F2FS_NAME_LEN)) { 130 95 WARN_ON(1); 131 96 err = -ENAMETOOLONG; 132 - goto out_err; 97 + goto out; 133 98 } 134 99 retry: 135 100 de = f2fs_find_entry(dir, &name, &page); ··· 155 120 goto retry; 156 121 } 157 122 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); 158 - if (err) 159 - goto out_err; 160 - 161 - if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { 162 - iput(dir); 163 - } else { 164 - add_dirty_dir_inode(dir); 165 - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); 166 - } 167 123 168 124 goto out; 169 125 170 126 out_unmap_put: 171 127 f2fs_dentry_kunmap(dir, page); 172 128 f2fs_put_page(page, 0); 173 - out_err: 174 - iput(dir); 175 129 out: 176 130 f2fs_msg(inode->i_sb, KERN_NOTICE, 177 131 "%s: ino = %x, name = %s, dir = %lx, err = %d", ··· 222 198 { 223 199 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 224 200 struct curseg_info *curseg; 201 + struct inode *inode; 225 202 struct page *page = NULL; 226 203 block_t blkaddr; 227 204 int err = 0; ··· 230 205 /* get node pages in the current segment */ 231 206 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 232 207 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 233 - 234 - ra_meta_pages(sbi, blkaddr, 1, META_POR, true); 235 208 236 209 while (1) { 237 210 struct fsync_inode_entry *entry; ··· 256 233 break; 257 234 } 258 235 259 - /* add this fsync inode to the list */ 260 - entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 261 - if (!entry) { 262 - err = -ENOMEM; 263 - break; 264 - } 265 236 /* 266 237 * CP | dnode(F) | inode(DF) 267 238 * For this case, we should not give up now. 268 239 */ 269 - entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 270 - if (IS_ERR(entry->inode)) { 271 - err = PTR_ERR(entry->inode); 272 - kmem_cache_free(fsync_entry_slab, entry); 240 + inode = f2fs_iget(sbi->sb, ino_of_node(page)); 241 + if (IS_ERR(inode)) { 242 + err = PTR_ERR(inode); 273 243 if (err == -ENOENT) { 274 244 err = 0; 275 245 goto next; 276 246 } 277 247 break; 278 248 } 279 - list_add_tail(&entry->list, head); 249 + 250 + /* add this fsync inode to the list */ 251 + entry = add_fsync_inode(head, inode); 252 + if (!entry) { 253 + err = -ENOMEM; 254 + iput(inode); 255 + break; 256 + } 280 257 } 281 258 entry->blkaddr = blkaddr; 282 259 283 - if (IS_INODE(page)) { 284 - entry->last_inode = blkaddr; 285 - if (is_dent_dnode(page)) 286 - entry->last_dentry = blkaddr; 287 - } 260 + if (IS_INODE(page) && is_dent_dnode(page)) 261 + entry->last_dentry = blkaddr; 288 262 next: 289 263 /* check next segment */ 290 264 blkaddr = next_blkaddr_of_node(page); ··· 297 277 { 298 278 struct fsync_inode_entry *entry, *tmp; 299 279 300 - list_for_each_entry_safe(entry, tmp, head, list) { 301 - iput(entry->inode); 302 - list_del(&entry->list); 303 - kmem_cache_free(fsync_entry_slab, entry); 304 - } 280 + list_for_each_entry_safe(entry, tmp, head, list) 281 + del_fsync_inode(entry); 305 282 } 306 283 307 284 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, ··· 461 444 */ 462 445 if (dest == NEW_ADDR) { 463 446 truncate_data_blocks_range(&dn, 1); 464 - err = reserve_new_block(&dn); 465 - f2fs_bug_on(sbi, err); 447 + reserve_new_block(&dn); 466 448 continue; 467 449 } 468 450 ··· 470 454 471 455 if (src == NULL_ADDR) { 472 456 err = reserve_new_block(&dn); 457 + #ifdef CONFIG_F2FS_FAULT_INJECTION 458 + while (err) 459 + err = reserve_new_block(&dn); 460 + #endif 473 461 /* We should not get -ENOSPC */ 474 462 f2fs_bug_on(sbi, err); 475 463 } ··· 506 486 return err; 507 487 } 508 488 509 - static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) 489 + static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, 490 + struct list_head *dir_list) 510 491 { 511 492 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 512 493 struct curseg_info *curseg; ··· 534 513 break; 535 514 } 536 515 537 - entry = get_fsync_inode(head, ino_of_node(page)); 516 + entry = get_fsync_inode(inode_list, ino_of_node(page)); 538 517 if (!entry) 539 518 goto next; 540 519 /* ··· 542 521 * In this case, we can lose the latest inode(x). 543 522 * So, call recover_inode for the inode update. 544 523 */ 545 - if (entry->last_inode == blkaddr) 524 + if (IS_INODE(page)) 546 525 recover_inode(entry->inode, page); 547 526 if (entry->last_dentry == blkaddr) { 548 - err = recover_dentry(entry->inode, page); 527 + err = recover_dentry(entry->inode, page, dir_list); 549 528 if (err) { 550 529 f2fs_put_page(page, 1); 551 530 break; ··· 557 536 break; 558 537 } 559 538 560 - if (entry->blkaddr == blkaddr) { 561 - iput(entry->inode); 562 - list_del(&entry->list); 563 - kmem_cache_free(fsync_entry_slab, entry); 564 - } 539 + if (entry->blkaddr == blkaddr) 540 + del_fsync_inode(entry); 565 541 next: 566 542 /* check next segment */ 567 543 blkaddr = next_blkaddr_of_node(page); ··· 569 551 return err; 570 552 } 571 553 572 - int recover_fsync_data(struct f2fs_sb_info *sbi) 554 + int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) 573 555 { 574 556 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 575 557 struct list_head inode_list; 558 + struct list_head dir_list; 576 559 block_t blkaddr; 577 560 int err; 561 + int ret = 0; 578 562 bool need_writecp = false; 579 563 580 564 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", ··· 585 565 return -ENOMEM; 586 566 587 567 INIT_LIST_HEAD(&inode_list); 568 + INIT_LIST_HEAD(&dir_list); 588 569 589 570 /* prevent checkpoint */ 590 571 mutex_lock(&sbi->cp_mutex); ··· 594 573 595 574 /* step #1: find fsynced inode numbers */ 596 575 err = find_fsync_dnodes(sbi, &inode_list); 597 - if (err) 576 + if (err || list_empty(&inode_list)) 598 577 goto out; 599 578 600 - if (list_empty(&inode_list)) 579 + if (check_only) { 580 + ret = 1; 601 581 goto out; 582 + } 602 583 603 584 need_writecp = true; 604 585 605 586 /* step #2: recover data */ 606 - err = recover_data(sbi, &inode_list); 587 + err = recover_data(sbi, &inode_list, &dir_list); 607 588 if (!err) 608 589 f2fs_bug_on(sbi, !list_empty(&inode_list)); 609 590 out: 610 591 destroy_fsync_dnodes(&inode_list); 611 - kmem_cache_destroy(fsync_entry_slab); 612 592 613 593 /* truncate meta pages to be used by the recovery */ 614 594 truncate_inode_pages_range(META_MAPPING(sbi), ··· 647 625 } else { 648 626 mutex_unlock(&sbi->cp_mutex); 649 627 } 650 - return err; 628 + 629 + destroy_fsync_dnodes(&dir_list); 630 + kmem_cache_destroy(fsync_entry_slab); 631 + return ret ? ret: err; 651 632 }

+6 -2

fs/f2fs/segment.c

··· 223 223 f2fs_put_dnode(&dn); 224 224 } 225 225 next: 226 - ClearPageUptodate(page); 226 + /* we don't need to invalidate this in the sccessful status */ 227 + if (drop || recover) 228 + ClearPageUptodate(page); 227 229 set_page_private(page, 0); 228 - ClearPageUptodate(page); 230 + ClearPagePrivate(page); 229 231 f2fs_put_page(page, 1); 230 232 231 233 list_del(&cur->list); ··· 240 238 void drop_inmem_pages(struct inode *inode) 241 239 { 242 240 struct f2fs_inode_info *fi = F2FS_I(inode); 241 + 242 + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 243 243 244 244 mutex_lock(&fi->inmem_lock); 245 245 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);

+5 -4

fs/f2fs/segment.h

··· 158 158 }; 159 159 160 160 struct seg_entry { 161 - unsigned short valid_blocks; /* # of valid blocks */ 161 + unsigned int type:6; /* segment type like CURSEG_XXX_TYPE */ 162 + unsigned int valid_blocks:10; /* # of valid blocks */ 163 + unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ 164 + unsigned int padding:6; /* padding */ 162 165 unsigned char *cur_valid_map; /* validity bitmap of blocks */ 163 166 /* 164 167 * # of valid blocks and the validity bitmap stored in the the last 165 168 * checkpoint pack. This information is used by the SSR mode. 166 169 */ 167 - unsigned short ckpt_valid_blocks; 168 - unsigned char *ckpt_valid_map; 170 + unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */ 169 171 unsigned char *discard_map; 170 - unsigned char type; /* segment type like CURSEG_XXX_TYPE */ 171 172 unsigned long long mtime; /* modification time of the segment */ 172 173 }; 173 174

+246 -42

fs/f2fs/super.c

··· 39 39 static struct kmem_cache *f2fs_inode_cachep; 40 40 static struct kset *f2fs_kset; 41 41 42 + #ifdef CONFIG_F2FS_FAULT_INJECTION 43 + struct f2fs_fault_info f2fs_fault; 44 + 45 + char *fault_name[FAULT_MAX] = { 46 + [FAULT_KMALLOC] = "kmalloc", 47 + [FAULT_PAGE_ALLOC] = "page alloc", 48 + [FAULT_ALLOC_NID] = "alloc nid", 49 + [FAULT_ORPHAN] = "orphan", 50 + [FAULT_BLOCK] = "no more block", 51 + [FAULT_DIR_DEPTH] = "too big dir depth", 52 + }; 53 + 54 + static void f2fs_build_fault_attr(unsigned int rate) 55 + { 56 + if (rate) { 57 + atomic_set(&f2fs_fault.inject_ops, 0); 58 + f2fs_fault.inject_rate = rate; 59 + f2fs_fault.inject_type = (1 << FAULT_MAX) - 1; 60 + } else { 61 + memset(&f2fs_fault, 0, sizeof(struct f2fs_fault_info)); 62 + } 63 + } 64 + #endif 65 + 42 66 /* f2fs-wide shrinker description */ 43 67 static struct shrinker f2fs_shrinker_info = { 44 68 .scan_objects = f2fs_shrink_scan, ··· 92 68 Opt_noextent_cache, 93 69 Opt_noinline_data, 94 70 Opt_data_flush, 71 + Opt_fault_injection, 95 72 Opt_err, 96 73 }; 97 74 ··· 118 93 {Opt_noextent_cache, "noextent_cache"}, 119 94 {Opt_noinline_data, "noinline_data"}, 120 95 {Opt_data_flush, "data_flush"}, 96 + {Opt_fault_injection, "fault_injection=%u"}, 121 97 {Opt_err, NULL}, 122 98 }; 123 99 ··· 128 102 SM_INFO, /* struct f2fs_sm_info */ 129 103 NM_INFO, /* struct f2fs_nm_info */ 130 104 F2FS_SBI, /* struct f2fs_sb_info */ 105 + #ifdef CONFIG_F2FS_FAULT_INJECTION 106 + FAULT_INFO_RATE, /* struct f2fs_fault_info */ 107 + FAULT_INFO_TYPE, /* struct f2fs_fault_info */ 108 + #endif 131 109 }; 132 110 133 111 struct f2fs_attr { ··· 153 123 return (unsigned char *)NM_I(sbi); 154 124 else if (struct_type == F2FS_SBI) 155 125 return (unsigned char *)sbi; 126 + #ifdef CONFIG_F2FS_FAULT_INJECTION 127 + else if (struct_type == FAULT_INFO_RATE || 128 + struct_type == FAULT_INFO_TYPE) 129 + return (unsigned char *)&f2fs_fault; 130 + #endif 156 131 return NULL; 157 132 } 158 133 ··· 207 172 ret = kstrtoul(skip_spaces(buf), 0, &t); 208 173 if (ret < 0) 209 174 return ret; 175 + #ifdef CONFIG_F2FS_FAULT_INJECTION 176 + if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) 177 + return -EINVAL; 178 + #endif 210 179 *ui = t; 211 180 return count; 212 181 } ··· 276 237 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 277 238 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); 278 239 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); 240 + #ifdef CONFIG_F2FS_FAULT_INJECTION 241 + F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); 242 + F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); 243 + #endif 279 244 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); 280 245 281 246 #define ATTR_LIST(name) (&f2fs_attr_##name.attr) ··· 316 273 .release = f2fs_sb_release, 317 274 }; 318 275 276 + #ifdef CONFIG_F2FS_FAULT_INJECTION 277 + /* sysfs for f2fs fault injection */ 278 + static struct kobject f2fs_fault_inject; 279 + 280 + static struct attribute *f2fs_fault_attrs[] = { 281 + ATTR_LIST(inject_rate), 282 + ATTR_LIST(inject_type), 283 + NULL 284 + }; 285 + 286 + static struct kobj_type f2fs_fault_ktype = { 287 + .default_attrs = f2fs_fault_attrs, 288 + .sysfs_ops = &f2fs_attr_ops, 289 + }; 290 + #endif 291 + 319 292 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) 320 293 { 321 294 struct va_format vaf; ··· 358 299 substring_t args[MAX_OPT_ARGS]; 359 300 char *p, *name; 360 301 int arg = 0; 302 + 303 + #ifdef CONFIG_F2FS_FAULT_INJECTION 304 + f2fs_build_fault_attr(0); 305 + #endif 361 306 362 307 if (!options) 363 308 return 0; ··· 496 433 case Opt_data_flush: 497 434 set_opt(sbi, DATA_FLUSH); 498 435 break; 436 + case Opt_fault_injection: 437 + if (args->from && match_int(args, &arg)) 438 + return -EINVAL; 439 + #ifdef CONFIG_F2FS_FAULT_INJECTION 440 + f2fs_build_fault_attr(arg); 441 + #else 442 + f2fs_msg(sb, KERN_INFO, 443 + "FAULT_INJECTION was not selected"); 444 + #endif 445 + break; 499 446 default: 500 447 f2fs_msg(sb, KERN_ERR, 501 448 "Unrecognized mount option \"%s\" or missing value", ··· 526 453 527 454 init_once((void *) fi); 528 455 456 + if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) { 457 + kmem_cache_free(f2fs_inode_cachep, fi); 458 + return NULL; 459 + } 460 + 529 461 /* Initialize f2fs-specific inode info */ 530 462 fi->vfs_inode.i_version = 1; 531 - atomic_set(&fi->dirty_pages, 0); 532 463 fi->i_current_depth = 1; 533 464 fi->i_advise = 0; 534 465 init_rwsem(&fi->i_sem); ··· 607 530 608 531 static void f2fs_destroy_inode(struct inode *inode) 609 532 { 533 + percpu_counter_destroy(&F2FS_I(inode)->dirty_pages); 610 534 call_rcu(&inode->i_rcu, f2fs_i_callback); 535 + } 536 + 537 + static void destroy_percpu_info(struct f2fs_sb_info *sbi) 538 + { 539 + int i; 540 + 541 + for (i = 0; i < NR_COUNT_TYPE; i++) 542 + percpu_counter_destroy(&sbi->nr_pages[i]); 543 + percpu_counter_destroy(&sbi->alloc_valid_block_count); 544 + percpu_counter_destroy(&sbi->total_valid_inode_count); 611 545 } 612 546 613 547 static void f2fs_put_super(struct super_block *sb) ··· 627 539 628 540 if (sbi->s_proc) { 629 541 remove_proc_entry("segment_info", sbi->s_proc); 542 + remove_proc_entry("segment_bits", sbi->s_proc); 630 543 remove_proc_entry(sb->s_id, f2fs_proc_root); 631 544 } 632 545 kobject_del(&sbi->s_kobj); ··· 657 568 * normally superblock is clean, so we need to release this. 658 569 * In addition, EIO will skip do checkpoint, we need this as well. 659 570 */ 660 - release_ino_entry(sbi); 571 + release_ino_entry(sbi, true); 661 572 release_discard_addrs(sbi); 662 573 663 574 f2fs_leave_shrinker(sbi); 664 575 mutex_unlock(&sbi->umount_mutex); 665 576 666 577 /* our cp_error case, we can wait for any writeback page */ 667 - if (get_pages(sbi, F2FS_WRITEBACK)) 668 - f2fs_flush_merged_bios(sbi); 578 + f2fs_flush_merged_bios(sbi); 669 579 670 580 iput(sbi->node_inode); 671 581 iput(sbi->meta_inode); ··· 681 593 if (sbi->s_chksum_driver) 682 594 crypto_free_shash(sbi->s_chksum_driver); 683 595 kfree(sbi->raw_super); 596 + 597 + destroy_percpu_info(sbi); 684 598 kfree(sbi); 685 599 } 686 600 ··· 835 745 return 0; 836 746 } 837 747 838 - static int segment_info_open_fs(struct inode *inode, struct file *file) 748 + static int segment_bits_seq_show(struct seq_file *seq, void *offset) 839 749 { 840 - return single_open(file, segment_info_seq_show, PDE_DATA(inode)); 750 + struct super_block *sb = seq->private; 751 + struct f2fs_sb_info *sbi = F2FS_SB(sb); 752 + unsigned int total_segs = 753 + le32_to_cpu(sbi->raw_super->segment_count_main); 754 + int i, j; 755 + 756 + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" 757 + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); 758 + 759 + for (i = 0; i < total_segs; i++) { 760 + struct seg_entry *se = get_seg_entry(sbi, i); 761 + 762 + seq_printf(seq, "%-10d", i); 763 + seq_printf(seq, "%d|%-3u|", se->type, 764 + get_valid_blocks(sbi, i, 1)); 765 + for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) 766 + seq_printf(seq, "%x ", se->cur_valid_map[j]); 767 + seq_putc(seq, '\n'); 768 + } 769 + return 0; 841 770 } 842 771 843 - static const struct file_operations f2fs_seq_segment_info_fops = { 844 - .owner = THIS_MODULE, 845 - .open = segment_info_open_fs, 846 - .read = seq_read, 847 - .llseek = seq_lseek, 848 - .release = single_release, 772 + #define F2FS_PROC_FILE_DEF(_name) \ 773 + static int _name##_open_fs(struct inode *inode, struct file *file) \ 774 + { \ 775 + return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ 776 + } \ 777 + \ 778 + static const struct file_operations f2fs_seq_##_name##_fops = { \ 779 + .owner = THIS_MODULE, \ 780 + .open = _name##_open_fs, \ 781 + .read = seq_read, \ 782 + .llseek = seq_lseek, \ 783 + .release = single_release, \ 849 784 }; 785 + 786 + F2FS_PROC_FILE_DEF(segment_info); 787 + F2FS_PROC_FILE_DEF(segment_bits); 850 788 851 789 static void default_options(struct f2fs_sb_info *sbi) 852 790 { ··· 909 791 org_mount_opt = sbi->mount_opt; 910 792 active_logs = sbi->active_logs; 911 793 912 - if (*flags & MS_RDONLY) { 913 - set_opt(sbi, FASTBOOT); 914 - set_sbi_flag(sbi, SBI_IS_DIRTY); 794 + /* recover superblocks we couldn't write due to previous RO mount */ 795 + if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { 796 + err = f2fs_commit_super(sbi, false); 797 + f2fs_msg(sb, KERN_INFO, 798 + "Try to recover all the superblocks, ret: %d", err); 799 + if (!err) 800 + clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); 915 801 } 916 - 917 - sync_filesystem(sb); 918 802 919 803 sbi->mount_opt.opt = 0; 920 804 default_options(sbi); ··· 949 829 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { 950 830 if (sbi->gc_thread) { 951 831 stop_gc_thread(sbi); 952 - f2fs_sync_fs(sb, 1); 953 832 need_restart_gc = true; 954 833 } 955 834 } else if (!sbi->gc_thread) { ··· 956 837 if (err) 957 838 goto restore_opts; 958 839 need_stop_gc = true; 840 + } 841 + 842 + if (*flags & MS_RDONLY) { 843 + writeback_inodes_sb(sb, WB_REASON_SYNC); 844 + sync_inodes_sb(sb); 845 + 846 + set_sbi_flag(sbi, SBI_IS_DIRTY); 847 + set_sbi_flag(sbi, SBI_IS_CLOSE); 848 + f2fs_sync_fs(sb, 1); 849 + clear_sbi_flag(sbi, SBI_IS_CLOSE); 959 850 } 960 851 961 852 /* ··· 981 852 } 982 853 skip: 983 854 /* Update the POSIXACL Flag */ 984 - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 855 + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 985 856 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); 857 + 986 858 return 0; 987 859 restore_gc: 988 860 if (need_restart_gc) { ··· 1023 893 ctx, len, NULL); 1024 894 } 1025 895 896 + static int f2fs_key_prefix(struct inode *inode, u8 **key) 897 + { 898 + *key = F2FS_I_SB(inode)->key_prefix; 899 + return F2FS_I_SB(inode)->key_prefix_size; 900 + } 901 + 1026 902 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, 1027 903 void *fs_data) 1028 904 { ··· 1045 909 1046 910 static struct fscrypt_operations f2fs_cryptops = { 1047 911 .get_context = f2fs_get_context, 912 + .key_prefix = f2fs_key_prefix, 1048 913 .set_context = f2fs_set_context, 1049 914 .is_encrypted = f2fs_encrypted_inode, 1050 915 .empty_dir = f2fs_empty_dir, ··· 1135 998 return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); 1136 999 } 1137 1000 1138 - static inline bool sanity_check_area_boundary(struct super_block *sb, 1001 + static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, 1139 1002 struct buffer_head *bh) 1140 1003 { 1141 1004 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1142 1005 (bh->b_data + F2FS_SUPER_OFFSET); 1006 + struct super_block *sb = sbi->sb; 1143 1007 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 1144 1008 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); 1145 1009 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr); ··· 1219 1081 segment0_blkaddr) >> log_blocks_per_seg); 1220 1082 1221 1083 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) { 1084 + set_sbi_flag(sbi, SBI_NEED_SB_WRITE); 1222 1085 res = "internally"; 1223 1086 } else { 1224 1087 err = __f2fs_commit_super(bh, NULL); ··· 1237 1098 return false; 1238 1099 } 1239 1100 1240 - static int sanity_check_raw_super(struct super_block *sb, 1101 + static int sanity_check_raw_super(struct f2fs_sb_info *sbi, 1241 1102 struct buffer_head *bh) 1242 1103 { 1243 1104 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1244 1105 (bh->b_data + F2FS_SUPER_OFFSET); 1106 + struct super_block *sb = sbi->sb; 1245 1107 unsigned int blocksize; 1246 1108 1247 1109 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { ··· 1309 1169 } 1310 1170 1311 1171 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ 1312 - if (sanity_check_area_boundary(sb, bh)) 1172 + if (sanity_check_area_boundary(sbi, bh)) 1313 1173 return 1; 1314 1174 1315 1175 return 0; ··· 1341 1201 static void init_sb_info(struct f2fs_sb_info *sbi) 1342 1202 { 1343 1203 struct f2fs_super_block *raw_super = sbi->raw_super; 1344 - int i; 1345 1204 1346 1205 sbi->log_sectors_per_block = 1347 1206 le32_to_cpu(raw_super->log_sectors_per_block); ··· 1360 1221 sbi->cur_victim_sec = NULL_SECNO; 1361 1222 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; 1362 1223 1363 - for (i = 0; i < NR_COUNT_TYPE; i++) 1364 - atomic_set(&sbi->nr_pages[i], 0); 1365 - 1366 1224 sbi->dir_level = DEF_DIR_LEVEL; 1367 1225 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; 1368 1226 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; ··· 1367 1231 1368 1232 INIT_LIST_HEAD(&sbi->s_list); 1369 1233 mutex_init(&sbi->umount_mutex); 1234 + 1235 + #ifdef CONFIG_F2FS_FS_ENCRYPTION 1236 + memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, 1237 + F2FS_KEY_DESC_PREFIX_SIZE); 1238 + sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE; 1239 + #endif 1240 + } 1241 + 1242 + static int init_percpu_info(struct f2fs_sb_info *sbi) 1243 + { 1244 + int i, err; 1245 + 1246 + for (i = 0; i < NR_COUNT_TYPE; i++) { 1247 + err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); 1248 + if (err) 1249 + return err; 1250 + } 1251 + 1252 + err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL); 1253 + if (err) 1254 + return err; 1255 + 1256 + return percpu_counter_init(&sbi->total_valid_inode_count, 0, 1257 + GFP_KERNEL); 1370 1258 } 1371 1259 1372 1260 /* ··· 1399 1239 * to get the first valid one. If any one of them is broken, we pass 1400 1240 * them recovery flag back to the caller. 1401 1241 */ 1402 - static int read_raw_super_block(struct super_block *sb, 1242 + static int read_raw_super_block(struct f2fs_sb_info *sbi, 1403 1243 struct f2fs_super_block **raw_super, 1404 1244 int *valid_super_block, int *recovery) 1405 1245 { 1246 + struct super_block *sb = sbi->sb; 1406 1247 int block; 1407 1248 struct buffer_head *bh; 1408 1249 struct f2fs_super_block *super; ··· 1423 1262 } 1424 1263 1425 1264 /* sanity checking of raw super */ 1426 - if (sanity_check_raw_super(sb, bh)) { 1265 + if (sanity_check_raw_super(sbi, bh)) { 1427 1266 f2fs_msg(sb, KERN_ERR, 1428 1267 "Can't find valid F2FS filesystem in %dth superblock", 1429 1268 block + 1); ··· 1459 1298 struct buffer_head *bh; 1460 1299 int err; 1461 1300 1301 + if ((recover && f2fs_readonly(sbi->sb)) || 1302 + bdev_read_only(sbi->sb->s_bdev)) { 1303 + set_sbi_flag(sbi, SBI_NEED_SB_WRITE); 1304 + return -EROFS; 1305 + } 1306 + 1462 1307 /* write back-up superblock first */ 1463 1308 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); 1464 1309 if (!bh) ··· 1490 1323 struct f2fs_sb_info *sbi; 1491 1324 struct f2fs_super_block *raw_super; 1492 1325 struct inode *root; 1493 - long err; 1326 + int err; 1494 1327 bool retry = true, need_fsck = false; 1495 1328 char *options = NULL; 1496 1329 int recovery, i, valid_super_block; ··· 1507 1340 if (!sbi) 1508 1341 return -ENOMEM; 1509 1342 1343 + sbi->sb = sb; 1344 + 1510 1345 /* Load the checksum driver */ 1511 1346 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); 1512 1347 if (IS_ERR(sbi->s_chksum_driver)) { ··· 1524 1355 goto free_sbi; 1525 1356 } 1526 1357 1527 - err = read_raw_super_block(sb, &raw_super, &valid_super_block, 1358 + err = read_raw_super_block(sbi, &raw_super, &valid_super_block, 1528 1359 &recovery); 1529 1360 if (err) 1530 1361 goto free_sbi; ··· 1559 1390 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); 1560 1391 1561 1392 /* init f2fs-specific super block info */ 1562 - sbi->sb = sb; 1563 1393 sbi->raw_super = raw_super; 1564 1394 sbi->valid_super_block = valid_super_block; 1565 1395 mutex_init(&sbi->gc_mutex); ··· 1583 1415 init_waitqueue_head(&sbi->cp_wait); 1584 1416 init_sb_info(sbi); 1585 1417 1418 + err = init_percpu_info(sbi); 1419 + if (err) 1420 + goto free_options; 1421 + 1586 1422 /* get an inode for meta space */ 1587 1423 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 1588 1424 if (IS_ERR(sbi->meta_inode)) { ··· 1603 1431 1604 1432 sbi->total_valid_node_count = 1605 1433 le32_to_cpu(sbi->ckpt->valid_node_count); 1606 - sbi->total_valid_inode_count = 1607 - le32_to_cpu(sbi->ckpt->valid_inode_count); 1434 + percpu_counter_set(&sbi->total_valid_inode_count, 1435 + le32_to_cpu(sbi->ckpt->valid_inode_count)); 1608 1436 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); 1609 1437 sbi->total_valid_block_count = 1610 1438 le64_to_cpu(sbi->ckpt->valid_block_count); 1611 1439 sbi->last_valid_block_count = sbi->total_valid_block_count; 1612 - sbi->alloc_valid_block_count = 0; 1440 + 1613 1441 for (i = 0; i < NR_INODE_TYPE; i++) { 1614 1442 INIT_LIST_HEAD(&sbi->inode_list[i]); 1615 1443 spin_lock_init(&sbi->inode_lock[i]); ··· 1687 1515 if (f2fs_proc_root) 1688 1516 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1689 1517 1690 - if (sbi->s_proc) 1518 + if (sbi->s_proc) { 1691 1519 proc_create_data("segment_info", S_IRUGO, sbi->s_proc, 1692 1520 &f2fs_seq_segment_info_fops, sb); 1521 + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, 1522 + &f2fs_seq_segment_bits_fops, sb); 1523 + } 1693 1524 1694 1525 sbi->s_kobj.kset = f2fs_kset; 1695 1526 init_completion(&sbi->s_kobj_unregister); ··· 1716 1541 if (need_fsck) 1717 1542 set_sbi_flag(sbi, SBI_NEED_FSCK); 1718 1543 1719 - err = recover_fsync_data(sbi); 1720 - if (err) { 1544 + err = recover_fsync_data(sbi, false); 1545 + if (err < 0) { 1721 1546 need_fsck = true; 1722 1547 f2fs_msg(sb, KERN_ERR, 1723 - "Cannot recover all fsync data errno=%ld", err); 1548 + "Cannot recover all fsync data errno=%d", err); 1549 + goto free_kobj; 1550 + } 1551 + } else { 1552 + err = recover_fsync_data(sbi, true); 1553 + 1554 + if (!f2fs_readonly(sb) && err > 0) { 1555 + err = -EINVAL; 1556 + f2fs_msg(sb, KERN_ERR, 1557 + "Need to recover fsync data"); 1724 1558 goto free_kobj; 1725 1559 } 1726 1560 } 1561 + 1727 1562 /* recover_fsync_data() cleared this already */ 1728 1563 clear_sbi_flag(sbi, SBI_POR_DOING); 1729 1564 ··· 1750 1565 kfree(options); 1751 1566 1752 1567 /* recover broken superblock */ 1753 - if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { 1568 + if (recovery) { 1754 1569 err = f2fs_commit_super(sbi, true); 1755 1570 f2fs_msg(sb, KERN_INFO, 1756 - "Try to recover %dth superblock, ret: %ld", 1571 + "Try to recover %dth superblock, ret: %d", 1757 1572 sbi->valid_super_block ? 1 : 2, err); 1758 1573 } 1759 1574 ··· 1768 1583 free_proc: 1769 1584 if (sbi->s_proc) { 1770 1585 remove_proc_entry("segment_info", sbi->s_proc); 1586 + remove_proc_entry("segment_bits", sbi->s_proc); 1771 1587 remove_proc_entry(sb->s_id, f2fs_proc_root); 1772 1588 } 1773 1589 f2fs_destroy_stats(sbi); ··· 1789 1603 make_bad_inode(sbi->meta_inode); 1790 1604 iput(sbi->meta_inode); 1791 1605 free_options: 1606 + destroy_percpu_info(sbi); 1792 1607 kfree(options); 1793 1608 free_sb_buf: 1794 1609 kfree(raw_super); ··· 1875 1688 err = -ENOMEM; 1876 1689 goto free_extent_cache; 1877 1690 } 1691 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1692 + f2fs_fault_inject.kset = f2fs_kset; 1693 + f2fs_build_fault_attr(0); 1694 + err = kobject_init_and_add(&f2fs_fault_inject, &f2fs_fault_ktype, 1695 + NULL, "fault_injection"); 1696 + if (err) { 1697 + f2fs_fault_inject.kset = NULL; 1698 + goto free_kset; 1699 + } 1700 + #endif 1878 1701 err = register_shrinker(&f2fs_shrinker_info); 1879 1702 if (err) 1880 1703 goto free_kset; ··· 1903 1706 free_shrinker: 1904 1707 unregister_shrinker(&f2fs_shrinker_info); 1905 1708 free_kset: 1709 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1710 + if (f2fs_fault_inject.kset) 1711 + kobject_put(&f2fs_fault_inject); 1712 + #endif 1906 1713 kset_unregister(f2fs_kset); 1907 1714 free_extent_cache: 1908 1715 destroy_extent_cache(); ··· 1926 1725 { 1927 1726 remove_proc_entry("fs/f2fs", NULL); 1928 1727 f2fs_destroy_root_stats(); 1929 - unregister_shrinker(&f2fs_shrinker_info); 1930 1728 unregister_filesystem(&f2fs_fs_type); 1729 + unregister_shrinker(&f2fs_shrinker_info); 1730 + #ifdef CONFIG_F2FS_FAULT_INJECTION 1731 + kobject_put(&f2fs_fault_inject); 1732 + #endif 1733 + kset_unregister(f2fs_kset); 1931 1734 destroy_extent_cache(); 1932 1735 destroy_checkpoint_caches(); 1933 1736 destroy_segment_manager_caches(); 1934 1737 destroy_node_manager_caches(); 1935 1738 destroy_inodecache(); 1936 - kset_unregister(f2fs_kset); 1937 1739 f2fs_destroy_trace_ios(); 1938 1740 } 1939 1741

+1 -2

fs/f2fs/xattr.c

··· 498 498 free = free + ENTRY_SIZE(here); 499 499 500 500 if (unlikely(free < newsize)) { 501 - error = -ENOSPC; 501 + error = -E2BIG; 502 502 goto exit; 503 503 } 504 504 } ··· 526 526 * Before we come here, old entry is removed. 527 527 * We just write new entry. 528 528 */ 529 - memset(last, 0, newsize); 530 529 last->e_name_index = index; 531 530 last->e_name_len = len; 532 531 memcpy(last->e_name, name, len);

+2

include/linux/f2fs_fs.h

··· 508 508 F2FS_FT_MAX 509 509 }; 510 510 511 + #define S_SHIFT 12 512 + 511 513 #endif /* _LINUX_F2FS_FS_H */

+1

include/linux/fscrypto.h

··· 175 175 */ 176 176 struct fscrypt_operations { 177 177 int (*get_context)(struct inode *, void *, size_t); 178 + int (*key_prefix)(struct inode *, u8 **); 178 179 int (*prepare_context)(struct inode *); 179 180 int (*set_context)(struct inode *, const void *, size_t, void *); 180 181 int (*dummy_context)(struct inode *);

+14 -10

include/trace/events/f2fs.h

··· 694 694 __entry->ret) 695 695 ); 696 696 697 - TRACE_EVENT(f2fs_reserve_new_block, 697 + TRACE_EVENT(f2fs_reserve_new_blocks, 698 698 699 - TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), 699 + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node, 700 + blkcnt_t count), 700 701 701 - TP_ARGS(inode, nid, ofs_in_node), 702 + TP_ARGS(inode, nid, ofs_in_node, count), 702 703 703 704 TP_STRUCT__entry( 704 705 __field(dev_t, dev) 705 706 __field(nid_t, nid) 706 707 __field(unsigned int, ofs_in_node) 708 + __field(blkcnt_t, count) 707 709 ), 708 710 709 711 TP_fast_assign( 710 712 __entry->dev = inode->i_sb->s_dev; 711 713 __entry->nid = nid; 712 714 __entry->ofs_in_node = ofs_in_node; 715 + __entry->count = count; 713 716 ), 714 717 715 - TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", 718 + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu", 716 719 show_dev(__entry), 717 720 (unsigned int)__entry->nid, 718 - __entry->ofs_in_node) 721 + __entry->ofs_in_node, 722 + (unsigned long long)__entry->count) 719 723 ); 720 724 721 725 DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ··· 1275 1271 1276 1272 DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, 1277 1273 1278 - TP_PROTO(struct super_block *sb, int type, int count), 1274 + TP_PROTO(struct super_block *sb, int type, s64 count), 1279 1275 1280 1276 TP_ARGS(sb, type, count), 1281 1277 1282 1278 TP_STRUCT__entry( 1283 1279 __field(dev_t, dev) 1284 1280 __field(int, type) 1285 - __field(int, count) 1281 + __field(s64, count) 1286 1282 ), 1287 1283 1288 1284 TP_fast_assign( ··· 1291 1287 __entry->count = count; 1292 1288 ), 1293 1289 1294 - TP_printk("dev = (%d,%d), %s, dirty count = %d", 1290 + TP_printk("dev = (%d,%d), %s, dirty count = %lld", 1295 1291 show_dev(__entry), 1296 1292 show_file_type(__entry->type), 1297 1293 __entry->count) ··· 1299 1295 1300 1296 DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter, 1301 1297 1302 - TP_PROTO(struct super_block *sb, int type, int count), 1298 + TP_PROTO(struct super_block *sb, int type, s64 count), 1303 1299 1304 1300 TP_ARGS(sb, type, count) 1305 1301 ); 1306 1302 1307 1303 DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, 1308 1304 1309 - TP_PROTO(struct super_block *sb, int type, int count), 1305 + TP_PROTO(struct super_block *sb, int type, s64 count), 1310 1306 1311 1307 TP_ARGS(sb, type, count) 1312 1308 );