Merge tag 'for-f2fs-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

+31

Documentation/ABI/testing/sysfs-fs-f2fs

··· 24 24 Contact: "Namjae Jeon" <namjae.jeon@samsung.com> 25 25 Description: 26 26 Controls the victim selection policy for garbage collection. 27 + 28 + What: /sys/fs/f2fs/<disk>/reclaim_segments 29 + Date: October 2013 30 + Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 31 + Description: 32 + Controls the issue rate of segment discard commands. 33 + 34 + What: /sys/fs/f2fs/<disk>/ipu_policy 35 + Date: November 2013 36 + Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 37 + Description: 38 + Controls the in-place-update policy. 39 + 40 + What: /sys/fs/f2fs/<disk>/min_ipu_util 41 + Date: November 2013 42 + Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 43 + Description: 44 + Controls the FS utilization condition for the in-place-update 45 + policies. 46 + 47 + What: /sys/fs/f2fs/<disk>/max_small_discards 48 + Date: November 2013 49 + Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 50 + Description: 51 + Controls the issue rate of small discard commands. 52 + 53 + What: /sys/fs/f2fs/<disk>/max_victim_search 54 + Date: January 2014 55 + Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 56 + Description: 57 + Controls the number of trials to find a victim segment.

+24

Documentation/filesystems/f2fs.txt

··· 120 120 disable_ext_identify Disable the extension list configured by mkfs, so f2fs 121 121 does not aware of cold files such as media files. 122 122 inline_xattr Enable the inline xattrs feature. 123 + inline_data Enable the inline data feature: New created small(<~3.4k) 124 + files can be written into inode block. 123 125 124 126 ================================================================================ 125 127 DEBUGFS ENTRIES ··· 172 170 segments is larger than this number, f2fs tries to 173 171 conduct checkpoint to reclaim the prefree segments 174 172 to free segments. By default, 100 segments, 200MB. 173 + 174 + max_small_discards This parameter controls the number of discard 175 + commands that consist small blocks less than 2MB. 176 + The candidates to be discarded are cached until 177 + checkpoint is triggered, and issued during the 178 + checkpoint. By default, it is disabled with 0. 179 + 180 + ipu_policy This parameter controls the policy of in-place 181 + updates in f2fs. There are five policies: 182 + 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, 183 + 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL, 184 + 4: F2FS_IPU_DISABLE. 185 + 186 + min_ipu_util This parameter controls the threshold to trigger 187 + in-place-updates. The number indicates percentage 188 + of the filesystem utilization, and used by 189 + F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. 190 + 191 + max_victim_search This parameter controls the number of trials to 192 + find a victim segment when conducting SSR and 193 + cleaning operations. The default value is 4096 194 + which covers 8GB block address range. 175 195 176 196 ================================================================================ 177 197 USAGE

+1

MAINTAINERS

··· 3634 3634 T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 3635 3635 S: Maintained 3636 3636 F: Documentation/filesystems/f2fs.txt 3637 + F: Documentation/ABI/testing/sysfs-fs-f2fs 3637 3638 F: fs/f2fs/ 3638 3639 F: include/linux/f2fs_fs.h 3639 3640

+1 -1

fs/f2fs/Makefile

··· 1 1 obj-$(CONFIG_F2FS_FS) += f2fs.o 2 2 3 - f2fs-y := dir.o file.o inode.o namei.o hash.o super.o 3 + f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o 4 4 f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o 5 5 f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o 6 6 f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o

+108 -89

fs/f2fs/checkpoint.c

··· 30 30 */ 31 31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 32 32 { 33 - struct address_space *mapping = sbi->meta_inode->i_mapping; 33 + struct address_space *mapping = META_MAPPING(sbi); 34 34 struct page *page = NULL; 35 35 repeat: 36 36 page = grab_cache_page(mapping, index); ··· 50 50 */ 51 51 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 52 52 { 53 - struct address_space *mapping = sbi->meta_inode->i_mapping; 53 + struct address_space *mapping = META_MAPPING(sbi); 54 54 struct page *page; 55 55 repeat: 56 56 page = grab_cache_page(mapping, index); ··· 61 61 if (PageUptodate(page)) 62 62 goto out; 63 63 64 - if (f2fs_readpage(sbi, page, index, READ_SYNC)) 64 + if (f2fs_submit_page_bio(sbi, page, index, 65 + READ_SYNC | REQ_META | REQ_PRIO)) 65 66 goto repeat; 66 67 67 68 lock_page(page); 68 - if (page->mapping != mapping) { 69 + if (unlikely(page->mapping != mapping)) { 69 70 f2fs_put_page(page, 1); 70 71 goto repeat; 71 72 } ··· 82 81 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 83 82 84 83 /* Should not write any meta pages, if any IO error was occurred */ 85 - if (wbc->for_reclaim || sbi->por_doing || 86 - is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { 87 - dec_page_count(sbi, F2FS_DIRTY_META); 88 - wbc->pages_skipped++; 89 - set_page_dirty(page); 90 - return AOP_WRITEPAGE_ACTIVATE; 91 - } 84 + if (unlikely(sbi->por_doing || 85 + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) 86 + goto redirty_out; 87 + 88 + if (wbc->for_reclaim) 89 + goto redirty_out; 92 90 93 91 wait_on_page_writeback(page); 94 92 ··· 95 95 dec_page_count(sbi, F2FS_DIRTY_META); 96 96 unlock_page(page); 97 97 return 0; 98 + 99 + redirty_out: 100 + dec_page_count(sbi, F2FS_DIRTY_META); 101 + wbc->pages_skipped++; 102 + set_page_dirty(page); 103 + return AOP_WRITEPAGE_ACTIVATE; 98 104 } 99 105 100 106 static int f2fs_write_meta_pages(struct address_space *mapping, 101 107 struct writeback_control *wbc) 102 108 { 103 109 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 104 - struct block_device *bdev = sbi->sb->s_bdev; 110 + int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 105 111 long written; 106 112 107 113 if (wbc->for_kupdate) 108 114 return 0; 109 115 110 - if (get_pages(sbi, F2FS_DIRTY_META) == 0) 116 + /* collect a number of dirty meta pages and write together */ 117 + if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) 111 118 return 0; 112 119 113 120 /* if mounting is failed, skip writing node pages */ 114 121 mutex_lock(&sbi->cp_mutex); 115 - written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); 122 + written = sync_meta_pages(sbi, META, nrpages); 116 123 mutex_unlock(&sbi->cp_mutex); 117 124 wbc->nr_to_write -= written; 118 125 return 0; ··· 128 121 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 129 122 long nr_to_write) 130 123 { 131 - struct address_space *mapping = sbi->meta_inode->i_mapping; 124 + struct address_space *mapping = META_MAPPING(sbi); 132 125 pgoff_t index = 0, end = LONG_MAX; 133 126 struct pagevec pvec; 134 127 long nwritten = 0; ··· 143 136 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 144 137 PAGECACHE_TAG_DIRTY, 145 138 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 146 - if (nr_pages == 0) 139 + if (unlikely(nr_pages == 0)) 147 140 break; 148 141 149 142 for (i = 0; i < nr_pages; i++) { ··· 156 149 unlock_page(page); 157 150 break; 158 151 } 159 - if (nwritten++ >= nr_to_write) 152 + nwritten++; 153 + if (unlikely(nwritten >= nr_to_write)) 160 154 break; 161 155 } 162 156 pagevec_release(&pvec); ··· 165 157 } 166 158 167 159 if (nwritten) 168 - f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); 160 + f2fs_submit_merged_bio(sbi, type, WRITE); 169 161 170 162 return nwritten; 171 163 } ··· 194 186 195 187 int acquire_orphan_inode(struct f2fs_sb_info *sbi) 196 188 { 197 - unsigned int max_orphans; 198 189 int err = 0; 199 190 200 - /* 201 - * considering 512 blocks in a segment 5 blocks are needed for cp 202 - * and log segment summaries. Remaining blocks are used to keep 203 - * orphan entries with the limitation one reserved segment 204 - * for cp pack we can have max 1020*507 orphan entries 205 - */ 206 - max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; 207 - mutex_lock(&sbi->orphan_inode_mutex); 208 - if (sbi->n_orphans >= max_orphans) 191 + spin_lock(&sbi->orphan_inode_lock); 192 + if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 209 193 err = -ENOSPC; 210 194 else 211 195 sbi->n_orphans++; 212 - mutex_unlock(&sbi->orphan_inode_mutex); 196 + spin_unlock(&sbi->orphan_inode_lock); 197 + 213 198 return err; 214 199 } 215 200 216 201 void release_orphan_inode(struct f2fs_sb_info *sbi) 217 202 { 218 - mutex_lock(&sbi->orphan_inode_mutex); 203 + spin_lock(&sbi->orphan_inode_lock); 219 204 f2fs_bug_on(sbi->n_orphans == 0); 220 205 sbi->n_orphans--; 221 - mutex_unlock(&sbi->orphan_inode_mutex); 206 + spin_unlock(&sbi->orphan_inode_lock); 222 207 } 223 208 224 209 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) ··· 219 218 struct list_head *head, *this; 220 219 struct orphan_inode_entry *new = NULL, *orphan = NULL; 221 220 222 - mutex_lock(&sbi->orphan_inode_mutex); 221 + new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 222 + new->ino = ino; 223 + 224 + spin_lock(&sbi->orphan_inode_lock); 223 225 head = &sbi->orphan_inode_list; 224 226 list_for_each(this, head) { 225 227 orphan = list_entry(this, struct orphan_inode_entry, list); 226 - if (orphan->ino == ino) 227 - goto out; 228 + if (orphan->ino == ino) { 229 + spin_unlock(&sbi->orphan_inode_lock); 230 + kmem_cache_free(orphan_entry_slab, new); 231 + return; 232 + } 233 + 228 234 if (orphan->ino > ino) 229 235 break; 230 236 orphan = NULL; 231 237 } 232 - 233 - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 234 - new->ino = ino; 235 238 236 239 /* add new_oentry into list which is sorted by inode number */ 237 240 if (orphan) 238 241 list_add(&new->list, this->prev); 239 242 else 240 243 list_add_tail(&new->list, head); 241 - out: 242 - mutex_unlock(&sbi->orphan_inode_mutex); 244 + spin_unlock(&sbi->orphan_inode_lock); 243 245 } 244 246 245 247 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) ··· 250 246 struct list_head *head; 251 247 struct orphan_inode_entry *orphan; 252 248 253 - mutex_lock(&sbi->orphan_inode_mutex); 249 + spin_lock(&sbi->orphan_inode_lock); 254 250 head = &sbi->orphan_inode_list; 255 251 list_for_each_entry(orphan, head, list) { 256 252 if (orphan->ino == ino) { ··· 261 257 break; 262 258 } 263 259 } 264 - mutex_unlock(&sbi->orphan_inode_mutex); 260 + spin_unlock(&sbi->orphan_inode_lock); 265 261 } 266 262 267 263 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) ··· 274 270 iput(inode); 275 271 } 276 272 277 - int recover_orphan_inodes(struct f2fs_sb_info *sbi) 273 + void recover_orphan_inodes(struct f2fs_sb_info *sbi) 278 274 { 279 275 block_t start_blk, orphan_blkaddr, i, j; 280 276 281 277 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 282 - return 0; 278 + return; 283 279 284 280 sbi->por_doing = true; 285 281 start_blk = __start_cp_addr(sbi) + 1; ··· 299 295 /* clear Orphan Flag */ 300 296 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 301 297 sbi->por_doing = false; 302 - return 0; 298 + return; 303 299 } 304 300 305 301 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 306 302 { 307 - struct list_head *head, *this, *next; 303 + struct list_head *head; 308 304 struct f2fs_orphan_block *orphan_blk = NULL; 309 - struct page *page = NULL; 310 305 unsigned int nentries = 0; 311 - unsigned short index = 1; 312 - unsigned short orphan_blocks; 313 - 314 - orphan_blocks = (unsigned short)((sbi->n_orphans + 306 + unsigned short index; 307 + unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 315 308 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 309 + struct page *page = NULL; 310 + struct orphan_inode_entry *orphan = NULL; 316 311 317 - mutex_lock(&sbi->orphan_inode_mutex); 312 + for (index = 0; index < orphan_blocks; index++) 313 + grab_meta_page(sbi, start_blk + index); 314 + 315 + index = 1; 316 + spin_lock(&sbi->orphan_inode_lock); 318 317 head = &sbi->orphan_inode_list; 319 318 320 319 /* loop for each orphan inode entry and write them in Jornal block */ 321 - list_for_each_safe(this, next, head) { 322 - struct orphan_inode_entry *orphan; 320 + list_for_each_entry(orphan, head, list) { 321 + if (!page) { 322 + page = find_get_page(META_MAPPING(sbi), start_blk++); 323 + f2fs_bug_on(!page); 324 + orphan_blk = 325 + (struct f2fs_orphan_block *)page_address(page); 326 + memset(orphan_blk, 0, sizeof(*orphan_blk)); 327 + f2fs_put_page(page, 0); 328 + } 323 329 324 - orphan = list_entry(this, struct orphan_inode_entry, list); 330 + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 325 331 326 332 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 327 333 /* ··· 345 331 set_page_dirty(page); 346 332 f2fs_put_page(page, 1); 347 333 index++; 348 - start_blk++; 349 334 nentries = 0; 350 335 page = NULL; 351 336 } 352 - if (page) 353 - goto page_exist; 354 - 355 - page = grab_meta_page(sbi, start_blk); 356 - orphan_blk = (struct f2fs_orphan_block *)page_address(page); 357 - memset(orphan_blk, 0, sizeof(*orphan_blk)); 358 - page_exist: 359 - orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 360 337 } 361 - if (!page) 362 - goto end; 363 338 364 - orphan_blk->blk_addr = cpu_to_le16(index); 365 - orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 366 - orphan_blk->entry_count = cpu_to_le32(nentries); 367 - set_page_dirty(page); 368 - f2fs_put_page(page, 1); 369 - end: 370 - mutex_unlock(&sbi->orphan_inode_mutex); 339 + if (page) { 340 + orphan_blk->blk_addr = cpu_to_le16(index); 341 + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); 342 + orphan_blk->entry_count = cpu_to_le32(nentries); 343 + set_page_dirty(page); 344 + f2fs_put_page(page, 1); 345 + } 346 + 347 + spin_unlock(&sbi->orphan_inode_lock); 371 348 } 372 349 373 350 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, ··· 433 428 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 434 429 435 430 /* The second checkpoint pack should start at the next segment */ 436 - cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 431 + cp_start_blk_no += ((unsigned long long)1) << 432 + le32_to_cpu(fsb->log_blocks_per_seg); 437 433 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 438 434 439 435 if (cp1 && cp2) { ··· 471 465 list_for_each(this, head) { 472 466 struct dir_inode_entry *entry; 473 467 entry = list_entry(this, struct dir_inode_entry, list); 474 - if (entry->inode == inode) 468 + if (unlikely(entry->inode == inode)) 475 469 return -EEXIST; 476 470 } 477 471 list_add_tail(&new->list, head); ··· 519 513 void remove_dirty_dir_inode(struct inode *inode) 520 514 { 521 515 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 522 - struct list_head *head = &sbi->dir_inode_list; 523 - struct list_head *this; 516 + 517 + struct list_head *this, *head; 524 518 525 519 if (!S_ISDIR(inode->i_mode)) 526 520 return; ··· 531 525 return; 532 526 } 533 527 528 + head = &sbi->dir_inode_list; 534 529 list_for_each(this, head) { 535 530 struct dir_inode_entry *entry; 536 531 entry = list_entry(this, struct dir_inode_entry, list); ··· 553 546 554 547 struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) 555 548 { 556 - struct list_head *head = &sbi->dir_inode_list; 557 - struct list_head *this; 549 + 550 + struct list_head *this, *head; 558 551 struct inode *inode = NULL; 559 552 560 553 spin_lock(&sbi->dir_inode_lock); 554 + 555 + head = &sbi->dir_inode_list; 561 556 list_for_each(this, head) { 562 557 struct dir_inode_entry *entry; 563 558 entry = list_entry(this, struct dir_inode_entry, list); ··· 574 565 575 566 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 576 567 { 577 - struct list_head *head = &sbi->dir_inode_list; 568 + struct list_head *head; 578 569 struct dir_inode_entry *entry; 579 570 struct inode *inode; 580 571 retry: 581 572 spin_lock(&sbi->dir_inode_lock); 573 + 574 + head = &sbi->dir_inode_list; 582 575 if (list_empty(head)) { 583 576 spin_unlock(&sbi->dir_inode_lock); 584 577 return; ··· 596 585 * We should submit bio, since it exists several 597 586 * wribacking dentry pages in the freeing inode. 598 587 */ 599 - f2fs_submit_bio(sbi, DATA, true); 588 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 600 589 } 601 590 goto retry; 602 591 } ··· 771 760 /* wait for previous submitted node/meta pages writeback */ 772 761 wait_on_all_pages_writeback(sbi); 773 762 774 - filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); 775 - filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); 763 + filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); 764 + filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); 776 765 777 766 /* update user_block_counts */ 778 767 sbi->last_valid_block_count = sbi->total_valid_block_count; ··· 781 770 /* Here, we only have one bio having CP pack */ 782 771 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 783 772 784 - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 773 + if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 785 774 clear_prefree_segments(sbi); 786 775 F2FS_RESET_SB_DIRT(sbi); 787 776 } ··· 802 791 803 792 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 804 793 805 - f2fs_submit_bio(sbi, DATA, true); 806 - f2fs_submit_bio(sbi, NODE, true); 807 - f2fs_submit_bio(sbi, META, true); 794 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 795 + f2fs_submit_merged_bio(sbi, NODE, WRITE); 796 + f2fs_submit_merged_bio(sbi, META, WRITE); 808 797 809 798 /* 810 799 * update checkpoint pack index ··· 829 818 830 819 void init_orphan_info(struct f2fs_sb_info *sbi) 831 820 { 832 - mutex_init(&sbi->orphan_inode_mutex); 821 + spin_lock_init(&sbi->orphan_inode_lock); 833 822 INIT_LIST_HEAD(&sbi->orphan_inode_list); 834 823 sbi->n_orphans = 0; 824 + /* 825 + * considering 512 blocks in a segment 8 blocks are needed for cp 826 + * and log segment summaries. Remaining blocks are used to keep 827 + * orphan entries with the limitation one reserved segment 828 + * for cp pack we can have max 1020*504 orphan entries 829 + */ 830 + sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 831 + * F2FS_ORPHANS_PER_BLOCK; 835 832 } 836 833 837 834 int __init create_checkpoint_caches(void) 838 835 { 839 836 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 840 837 sizeof(struct orphan_inode_entry), NULL); 841 - if (unlikely(!orphan_entry_slab)) 838 + if (!orphan_entry_slab) 842 839 return -ENOMEM; 843 840 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 844 841 sizeof(struct dir_inode_entry), NULL); 845 - if (unlikely(!inode_entry_slab)) { 842 + if (!inode_entry_slab) { 846 843 kmem_cache_destroy(orphan_entry_slab); 847 844 return -ENOMEM; 848 845 }

+464 -161

fs/f2fs/data.c

··· 24 24 #include "segment.h" 25 25 #include <trace/events/f2fs.h> 26 26 27 + static void f2fs_read_end_io(struct bio *bio, int err) 28 + { 29 + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 30 + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 31 + 32 + do { 33 + struct page *page = bvec->bv_page; 34 + 35 + if (--bvec >= bio->bi_io_vec) 36 + prefetchw(&bvec->bv_page->flags); 37 + 38 + if (unlikely(!uptodate)) { 39 + ClearPageUptodate(page); 40 + SetPageError(page); 41 + } else { 42 + SetPageUptodate(page); 43 + } 44 + unlock_page(page); 45 + } while (bvec >= bio->bi_io_vec); 46 + 47 + bio_put(bio); 48 + } 49 + 50 + static void f2fs_write_end_io(struct bio *bio, int err) 51 + { 52 + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 53 + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 54 + struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); 55 + 56 + do { 57 + struct page *page = bvec->bv_page; 58 + 59 + if (--bvec >= bio->bi_io_vec) 60 + prefetchw(&bvec->bv_page->flags); 61 + 62 + if (unlikely(!uptodate)) { 63 + SetPageError(page); 64 + set_bit(AS_EIO, &page->mapping->flags); 65 + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 66 + sbi->sb->s_flags |= MS_RDONLY; 67 + } 68 + end_page_writeback(page); 69 + dec_page_count(sbi, F2FS_WRITEBACK); 70 + } while (bvec >= bio->bi_io_vec); 71 + 72 + if (bio->bi_private) 73 + complete(bio->bi_private); 74 + 75 + if (!get_pages(sbi, F2FS_WRITEBACK) && 76 + !list_empty(&sbi->cp_wait.task_list)) 77 + wake_up(&sbi->cp_wait); 78 + 79 + bio_put(bio); 80 + } 81 + 82 + /* 83 + * Low-level block read/write IO operations. 84 + */ 85 + static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, 86 + int npages, bool is_read) 87 + { 88 + struct bio *bio; 89 + 90 + /* No failure on bio allocation */ 91 + bio = bio_alloc(GFP_NOIO, npages); 92 + 93 + bio->bi_bdev = sbi->sb->s_bdev; 94 + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 95 + bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 96 + 97 + return bio; 98 + } 99 + 100 + static void __submit_merged_bio(struct f2fs_bio_info *io) 101 + { 102 + struct f2fs_io_info *fio = &io->fio; 103 + int rw; 104 + 105 + if (!io->bio) 106 + return; 107 + 108 + rw = fio->rw; 109 + 110 + if (is_read_io(rw)) { 111 + trace_f2fs_submit_read_bio(io->sbi->sb, rw, 112 + fio->type, io->bio); 113 + submit_bio(rw, io->bio); 114 + } else { 115 + trace_f2fs_submit_write_bio(io->sbi->sb, rw, 116 + fio->type, io->bio); 117 + /* 118 + * META_FLUSH is only from the checkpoint procedure, and we 119 + * should wait this metadata bio for FS consistency. 120 + */ 121 + if (fio->type == META_FLUSH) { 122 + DECLARE_COMPLETION_ONSTACK(wait); 123 + io->bio->bi_private = &wait; 124 + submit_bio(rw, io->bio); 125 + wait_for_completion(&wait); 126 + } else { 127 + submit_bio(rw, io->bio); 128 + } 129 + } 130 + 131 + io->bio = NULL; 132 + } 133 + 134 + void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, 135 + enum page_type type, int rw) 136 + { 137 + enum page_type btype = PAGE_TYPE_OF_BIO(type); 138 + struct f2fs_bio_info *io; 139 + 140 + io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 141 + 142 + mutex_lock(&io->io_mutex); 143 + 144 + /* change META to META_FLUSH in the checkpoint procedure */ 145 + if (type >= META_FLUSH) { 146 + io->fio.type = META_FLUSH; 147 + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 148 + } 149 + __submit_merged_bio(io); 150 + mutex_unlock(&io->io_mutex); 151 + } 152 + 153 + /* 154 + * Fill the locked page with data located in the block address. 155 + * Return unlocked page. 156 + */ 157 + int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, 158 + block_t blk_addr, int rw) 159 + { 160 + struct bio *bio; 161 + 162 + trace_f2fs_submit_page_bio(page, blk_addr, rw); 163 + 164 + /* Allocate a new bio */ 165 + bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); 166 + 167 + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 168 + bio_put(bio); 169 + f2fs_put_page(page, 1); 170 + return -EFAULT; 171 + } 172 + 173 + submit_bio(rw, bio); 174 + return 0; 175 + } 176 + 177 + void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, 178 + block_t blk_addr, struct f2fs_io_info *fio) 179 + { 180 + enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 181 + struct f2fs_bio_info *io; 182 + bool is_read = is_read_io(fio->rw); 183 + 184 + io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 185 + 186 + verify_block_addr(sbi, blk_addr); 187 + 188 + mutex_lock(&io->io_mutex); 189 + 190 + if (!is_read) 191 + inc_page_count(sbi, F2FS_WRITEBACK); 192 + 193 + if (io->bio && (io->last_block_in_bio != blk_addr - 1 || 194 + io->fio.rw != fio->rw)) 195 + __submit_merged_bio(io); 196 + alloc_new: 197 + if (io->bio == NULL) { 198 + int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 199 + 200 + io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 201 + io->fio = *fio; 202 + } 203 + 204 + if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < 205 + PAGE_CACHE_SIZE) { 206 + __submit_merged_bio(io); 207 + goto alloc_new; 208 + } 209 + 210 + io->last_block_in_bio = blk_addr; 211 + 212 + mutex_unlock(&io->io_mutex); 213 + trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 214 + } 215 + 27 216 /* 28 217 * Lock ordering for the change of data block address: 29 218 * ->data_page ··· 226 37 struct page *node_page = dn->node_page; 227 38 unsigned int ofs_in_node = dn->ofs_in_node; 228 39 229 - f2fs_wait_on_page_writeback(node_page, NODE, false); 40 + f2fs_wait_on_page_writeback(node_page, NODE); 230 41 231 42 rn = F2FS_NODE(node_page); 232 43 ··· 240 51 { 241 52 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 242 53 243 - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 54 + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 244 55 return -EPERM; 245 - if (!inc_valid_block_count(sbi, dn->inode, 1)) 56 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 246 57 return -ENOSPC; 247 58 248 59 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 249 60 250 61 __set_data_blkaddr(dn, NEW_ADDR); 251 62 dn->data_blkaddr = NEW_ADDR; 63 + mark_inode_dirty(dn->inode); 252 64 sync_inode_page(dn); 253 65 return 0; 66 + } 67 + 68 + int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 69 + { 70 + bool need_put = dn->inode_page ? false : true; 71 + int err; 72 + 73 + /* if inode_page exists, index should be zero */ 74 + f2fs_bug_on(!need_put && index); 75 + 76 + err = get_dnode_of_data(dn, index, ALLOC_NODE); 77 + if (err) 78 + return err; 79 + 80 + if (dn->data_blkaddr == NULL_ADDR) 81 + err = reserve_new_block(dn); 82 + if (err || need_put) 83 + f2fs_put_dnode(dn); 84 + return err; 254 85 } 255 86 256 87 static int check_extent_cache(struct inode *inode, pgoff_t pgofs, ··· 279 70 struct f2fs_inode_info *fi = F2FS_I(inode); 280 71 pgoff_t start_fofs, end_fofs; 281 72 block_t start_blkaddr; 73 + 74 + if (is_inode_flag_set(fi, FI_NO_EXTENT)) 75 + return 0; 282 76 283 77 read_lock(&fi->ext.ext_lock); 284 78 if (fi->ext.len == 0) { ··· 321 109 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 322 110 pgoff_t fofs, start_fofs, end_fofs; 323 111 block_t start_blkaddr, end_blkaddr; 112 + int need_update = true; 324 113 325 114 f2fs_bug_on(blk_addr == NEW_ADDR); 326 115 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + ··· 329 116 330 117 /* Update the page address in the parent node */ 331 118 __set_data_blkaddr(dn, blk_addr); 119 + 120 + if (is_inode_flag_set(fi, FI_NO_EXTENT)) 121 + return; 332 122 333 123 write_lock(&fi->ext.ext_lock); 334 124 ··· 379 163 fofs - start_fofs + 1; 380 164 fi->ext.len -= fofs - start_fofs + 1; 381 165 } 382 - goto end_update; 166 + } else { 167 + need_update = false; 383 168 } 384 - write_unlock(&fi->ext.ext_lock); 385 - return; 386 169 170 + /* Finally, if the extent is very fragmented, let's drop the cache. */ 171 + if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { 172 + fi->ext.len = 0; 173 + set_inode_flag(fi, FI_NO_EXTENT); 174 + need_update = true; 175 + } 387 176 end_update: 388 177 write_unlock(&fi->ext.ext_lock); 389 - sync_inode_page(dn); 178 + if (need_update) 179 + sync_inode_page(dn); 180 + return; 390 181 } 391 182 392 183 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) ··· 419 196 return ERR_PTR(-ENOENT); 420 197 421 198 /* By fallocate(), there is no cached page, but with NEW_ADDR */ 422 - if (dn.data_blkaddr == NEW_ADDR) 199 + if (unlikely(dn.data_blkaddr == NEW_ADDR)) 423 200 return ERR_PTR(-EINVAL); 424 201 425 202 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); ··· 431 208 return page; 432 209 } 433 210 434 - err = f2fs_readpage(sbi, page, dn.data_blkaddr, 211 + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 435 212 sync ? READ_SYNC : READA); 213 + if (err) 214 + return ERR_PTR(err); 215 + 436 216 if (sync) { 437 217 wait_on_page_locked(page); 438 - if (!PageUptodate(page)) { 218 + if (unlikely(!PageUptodate(page))) { 439 219 f2fs_put_page(page, 0); 440 220 return ERR_PTR(-EIO); 441 221 } ··· 472 246 } 473 247 f2fs_put_dnode(&dn); 474 248 475 - if (dn.data_blkaddr == NULL_ADDR) { 249 + if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 476 250 f2fs_put_page(page, 1); 477 251 return ERR_PTR(-ENOENT); 478 252 } ··· 492 266 return page; 493 267 } 494 268 495 - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 269 + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); 496 270 if (err) 497 271 return ERR_PTR(err); 498 272 499 273 lock_page(page); 500 - if (!PageUptodate(page)) { 274 + if (unlikely(!PageUptodate(page))) { 501 275 f2fs_put_page(page, 1); 502 276 return ERR_PTR(-EIO); 503 277 } 504 - if (page->mapping != mapping) { 278 + if (unlikely(page->mapping != mapping)) { 505 279 f2fs_put_page(page, 1); 506 280 goto repeat; 507 281 } ··· 512 286 * Caller ensures that this data page is never allocated. 513 287 * A new zero-filled data page is allocated in the page cache. 514 288 * 515 - * Also, caller should grab and release a mutex by calling mutex_lock_op() and 516 - * mutex_unlock_op(). 517 - * Note that, npage is set only by make_empty_dir. 289 + * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 290 + * f2fs_unlock_op(). 291 + * Note that, ipage is set only by make_empty_dir. 518 292 */ 519 293 struct page *get_new_data_page(struct inode *inode, 520 - struct page *npage, pgoff_t index, bool new_i_size) 294 + struct page *ipage, pgoff_t index, bool new_i_size) 521 295 { 522 296 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 523 297 struct address_space *mapping = inode->i_mapping; ··· 525 299 struct dnode_of_data dn; 526 300 int err; 527 301 528 - set_new_dnode(&dn, inode, npage, npage, 0); 529 - err = get_dnode_of_data(&dn, index, ALLOC_NODE); 302 + set_new_dnode(&dn, inode, ipage, NULL, 0); 303 + err = f2fs_reserve_block(&dn, index); 530 304 if (err) 531 305 return ERR_PTR(err); 532 - 533 - if (dn.data_blkaddr == NULL_ADDR) { 534 - if (reserve_new_block(&dn)) { 535 - if (!npage) 536 - f2fs_put_dnode(&dn); 537 - return ERR_PTR(-ENOSPC); 538 - } 539 - } 540 - if (!npage) 541 - f2fs_put_dnode(&dn); 542 306 repeat: 543 307 page = grab_cache_page(mapping, index); 544 - if (!page) 545 - return ERR_PTR(-ENOMEM); 308 + if (!page) { 309 + err = -ENOMEM; 310 + goto put_err; 311 + } 546 312 547 313 if (PageUptodate(page)) 548 314 return page; ··· 543 325 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 544 326 SetPageUptodate(page); 545 327 } else { 546 - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 328 + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 329 + READ_SYNC); 547 330 if (err) 548 - return ERR_PTR(err); 331 + goto put_err; 332 + 549 333 lock_page(page); 550 - if (!PageUptodate(page)) { 334 + if (unlikely(!PageUptodate(page))) { 551 335 f2fs_put_page(page, 1); 552 - return ERR_PTR(-EIO); 336 + err = -EIO; 337 + goto put_err; 553 338 } 554 - if (page->mapping != mapping) { 339 + if (unlikely(page->mapping != mapping)) { 555 340 f2fs_put_page(page, 1); 556 341 goto repeat; 557 342 } ··· 565 344 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 566 345 /* Only the directory inode sets new_i_size */ 567 346 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); 568 - mark_inode_dirty_sync(inode); 569 347 } 570 348 return page; 349 + 350 + put_err: 351 + f2fs_put_dnode(&dn); 352 + return ERR_PTR(err); 571 353 } 572 354 573 - static void read_end_io(struct bio *bio, int err) 355 + static int __allocate_data_block(struct dnode_of_data *dn) 574 356 { 575 - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 576 - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 357 + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 358 + struct f2fs_summary sum; 359 + block_t new_blkaddr; 360 + struct node_info ni; 361 + int type; 577 362 578 - do { 579 - struct page *page = bvec->bv_page; 363 + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 364 + return -EPERM; 365 + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 366 + return -ENOSPC; 580 367 581 - if (--bvec >= bio->bi_io_vec) 582 - prefetchw(&bvec->bv_page->flags); 368 + __set_data_blkaddr(dn, NEW_ADDR); 369 + dn->data_blkaddr = NEW_ADDR; 583 370 584 - if (uptodate) { 585 - SetPageUptodate(page); 586 - } else { 587 - ClearPageUptodate(page); 588 - SetPageError(page); 589 - } 590 - unlock_page(page); 591 - } while (bvec >= bio->bi_io_vec); 592 - bio_put(bio); 593 - } 371 + get_node_info(sbi, dn->nid, &ni); 372 + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 594 373 595 - /* 596 - * Fill the locked page with data located in the block address. 597 - * Return unlocked page. 598 - */ 599 - int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, 600 - block_t blk_addr, int type) 601 - { 602 - struct block_device *bdev = sbi->sb->s_bdev; 603 - struct bio *bio; 374 + type = CURSEG_WARM_DATA; 604 375 605 - trace_f2fs_readpage(page, blk_addr, type); 376 + allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); 606 377 607 - down_read(&sbi->bio_sem); 378 + /* direct IO doesn't use extent cache to maximize the performance */ 379 + set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 380 + update_extent_cache(new_blkaddr, dn); 381 + clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 608 382 609 - /* Allocate a new bio */ 610 - bio = f2fs_bio_alloc(bdev, 1); 611 - 612 - /* Initialize the bio */ 613 - bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 614 - bio->bi_end_io = read_end_io; 615 - 616 - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 617 - bio_put(bio); 618 - up_read(&sbi->bio_sem); 619 - f2fs_put_page(page, 1); 620 - return -EFAULT; 621 - } 622 - 623 - submit_bio(type, bio); 624 - up_read(&sbi->bio_sem); 383 + dn->data_blkaddr = new_blkaddr; 625 384 return 0; 626 385 } 627 386 628 387 /* 629 - * This function should be used by the data read flow only where it 630 - * does not check the "create" flag that indicates block allocation. 631 - * The reason for this special functionality is to exploit VFS readahead 632 - * mechanism. 388 + * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. 389 + * If original data blocks are allocated, then give them to blockdev. 390 + * Otherwise, 391 + * a. preallocate requested block addresses 392 + * b. do not use extent cache for better performance 393 + * c. give the block addresses to blockdev 633 394 */ 634 - static int get_data_block_ro(struct inode *inode, sector_t iblock, 395 + static int get_data_block(struct inode *inode, sector_t iblock, 635 396 struct buffer_head *bh_result, int create) 636 397 { 398 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 637 399 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 638 400 unsigned maxblocks = bh_result->b_size >> blkbits; 639 401 struct dnode_of_data dn; 640 - pgoff_t pgofs; 641 - int err; 402 + int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 403 + pgoff_t pgofs, end_offset; 404 + int err = 0, ofs = 1; 405 + bool allocated = false; 642 406 643 407 /* Get the page offset from the block offset(iblock) */ 644 408 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 645 409 646 - if (check_extent_cache(inode, pgofs, bh_result)) { 647 - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 648 - return 0; 649 - } 410 + if (check_extent_cache(inode, pgofs, bh_result)) 411 + goto out; 412 + 413 + if (create) 414 + f2fs_lock_op(sbi); 650 415 651 416 /* When reading holes, we need its node page */ 652 417 set_new_dnode(&dn, inode, NULL, NULL, 0); 653 - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); 418 + err = get_dnode_of_data(&dn, pgofs, mode); 654 419 if (err) { 655 - trace_f2fs_get_data_block(inode, iblock, bh_result, err); 656 - return (err == -ENOENT) ? 0 : err; 420 + if (err == -ENOENT) 421 + err = 0; 422 + goto unlock_out; 423 + } 424 + if (dn.data_blkaddr == NEW_ADDR) 425 + goto put_out; 426 + 427 + if (dn.data_blkaddr != NULL_ADDR) { 428 + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 429 + } else if (create) { 430 + err = __allocate_data_block(&dn); 431 + if (err) 432 + goto put_out; 433 + allocated = true; 434 + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 435 + } else { 436 + goto put_out; 657 437 } 658 438 659 - /* It does not support data allocation */ 660 - f2fs_bug_on(create); 439 + end_offset = IS_INODE(dn.node_page) ? 440 + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; 441 + bh_result->b_size = (((size_t)1) << blkbits); 442 + dn.ofs_in_node++; 443 + pgofs++; 661 444 662 - if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { 663 - int i; 664 - unsigned int end_offset; 445 + get_next: 446 + if (dn.ofs_in_node >= end_offset) { 447 + if (allocated) 448 + sync_inode_page(&dn); 449 + allocated = false; 450 + f2fs_put_dnode(&dn); 451 + 452 + set_new_dnode(&dn, inode, NULL, NULL, 0); 453 + err = get_dnode_of_data(&dn, pgofs, mode); 454 + if (err) { 455 + if (err == -ENOENT) 456 + err = 0; 457 + goto unlock_out; 458 + } 459 + if (dn.data_blkaddr == NEW_ADDR) 460 + goto put_out; 665 461 666 462 end_offset = IS_INODE(dn.node_page) ? 667 - ADDRS_PER_INODE(F2FS_I(inode)) : 668 - ADDRS_PER_BLOCK; 669 - 670 - clear_buffer_new(bh_result); 671 - 672 - /* Give more consecutive addresses for the read ahead */ 673 - for (i = 0; i < end_offset - dn.ofs_in_node; i++) 674 - if (((datablock_addr(dn.node_page, 675 - dn.ofs_in_node + i)) 676 - != (dn.data_blkaddr + i)) || maxblocks == i) 677 - break; 678 - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 679 - bh_result->b_size = (i << blkbits); 463 + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; 680 464 } 465 + 466 + if (maxblocks > (bh_result->b_size >> blkbits)) { 467 + block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 468 + if (blkaddr == NULL_ADDR && create) { 469 + err = __allocate_data_block(&dn); 470 + if (err) 471 + goto sync_out; 472 + allocated = true; 473 + blkaddr = dn.data_blkaddr; 474 + } 475 + /* Give more consecutive addresses for the read ahead */ 476 + if (blkaddr == (bh_result->b_blocknr + ofs)) { 477 + ofs++; 478 + dn.ofs_in_node++; 479 + pgofs++; 480 + bh_result->b_size += (((size_t)1) << blkbits); 481 + goto get_next; 482 + } 483 + } 484 + sync_out: 485 + if (allocated) 486 + sync_inode_page(&dn); 487 + put_out: 681 488 f2fs_put_dnode(&dn); 682 - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 683 - return 0; 489 + unlock_out: 490 + if (create) 491 + f2fs_unlock_op(sbi); 492 + out: 493 + trace_f2fs_get_data_block(inode, iblock, bh_result, err); 494 + return err; 684 495 } 685 496 686 497 static int f2fs_read_data_page(struct file *file, struct page *page) 687 498 { 688 - return mpage_readpage(page, get_data_block_ro); 499 + struct inode *inode = page->mapping->host; 500 + int ret; 501 + 502 + /* If the file has inline data, try to read it directlly */ 503 + if (f2fs_has_inline_data(inode)) 504 + ret = f2fs_read_inline_data(inode, page); 505 + else 506 + ret = mpage_readpage(page, get_data_block); 507 + 508 + return ret; 689 509 } 690 510 691 511 static int f2fs_read_data_pages(struct file *file, 692 512 struct address_space *mapping, 693 513 struct list_head *pages, unsigned nr_pages) 694 514 { 695 - return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); 515 + struct inode *inode = file->f_mapping->host; 516 + 517 + /* If the file has inline data, skip readpages */ 518 + if (f2fs_has_inline_data(inode)) 519 + return 0; 520 + 521 + return mpage_readpages(mapping, pages, nr_pages, get_data_block); 696 522 } 697 523 698 - int do_write_data_page(struct page *page) 524 + int do_write_data_page(struct page *page, struct f2fs_io_info *fio) 699 525 { 700 526 struct inode *inode = page->mapping->host; 701 - block_t old_blk_addr, new_blk_addr; 527 + block_t old_blkaddr, new_blkaddr; 702 528 struct dnode_of_data dn; 703 529 int err = 0; 704 530 ··· 754 486 if (err) 755 487 return err; 756 488 757 - old_blk_addr = dn.data_blkaddr; 489 + old_blkaddr = dn.data_blkaddr; 758 490 759 491 /* This page is already truncated */ 760 - if (old_blk_addr == NULL_ADDR) 492 + if (old_blkaddr == NULL_ADDR) 761 493 goto out_writepage; 762 494 763 495 set_page_writeback(page); ··· 766 498 * If current allocation needs SSR, 767 499 * it had better in-place writes for updated data. 768 500 */ 769 - if (unlikely(old_blk_addr != NEW_ADDR && 501 + if (unlikely(old_blkaddr != NEW_ADDR && 770 502 !is_cold_data(page) && 771 503 need_inplace_update(inode))) { 772 - rewrite_data_page(F2FS_SB(inode->i_sb), page, 773 - old_blk_addr); 504 + rewrite_data_page(page, old_blkaddr, fio); 774 505 } else { 775 - write_data_page(inode, page, &dn, 776 - old_blk_addr, &new_blk_addr); 777 - update_extent_cache(new_blk_addr, &dn); 506 + write_data_page(page, &dn, &new_blkaddr, fio); 507 + update_extent_cache(new_blkaddr, &dn); 778 508 } 779 509 out_writepage: 780 510 f2fs_put_dnode(&dn); ··· 787 521 loff_t i_size = i_size_read(inode); 788 522 const pgoff_t end_index = ((unsigned long long) i_size) 789 523 >> PAGE_CACHE_SHIFT; 790 - unsigned offset; 524 + unsigned offset = 0; 791 525 bool need_balance_fs = false; 792 526 int err = 0; 527 + struct f2fs_io_info fio = { 528 + .type = DATA, 529 + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 530 + }; 793 531 794 532 if (page->index < end_index) 795 533 goto write; ··· 813 543 814 544 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 815 545 write: 816 - if (sbi->por_doing) { 546 + if (unlikely(sbi->por_doing)) { 817 547 err = AOP_WRITEPAGE_ACTIVATE; 818 548 goto redirty_out; 819 549 } ··· 822 552 if (S_ISDIR(inode->i_mode)) { 823 553 dec_page_count(sbi, F2FS_DIRTY_DENTS); 824 554 inode_dec_dirty_dents(inode); 825 - err = do_write_data_page(page); 555 + err = do_write_data_page(page, &fio); 826 556 } else { 827 557 f2fs_lock_op(sbi); 828 - err = do_write_data_page(page); 558 + 559 + if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) { 560 + err = f2fs_write_inline_data(inode, page, offset); 561 + f2fs_unlock_op(sbi); 562 + goto out; 563 + } else { 564 + err = do_write_data_page(page, &fio); 565 + } 566 + 829 567 f2fs_unlock_op(sbi); 830 568 need_balance_fs = true; 831 569 } ··· 842 564 else if (err) 843 565 goto redirty_out; 844 566 845 - if (wbc->for_reclaim) 846 - f2fs_submit_bio(sbi, DATA, true); 567 + if (wbc->for_reclaim) { 568 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 569 + need_balance_fs = false; 570 + } 847 571 848 572 clear_cold_data(page); 849 573 out: ··· 897 617 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 898 618 if (locked) 899 619 mutex_unlock(&sbi->writepages); 900 - f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 620 + 621 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 901 622 902 623 remove_dirty_dir_inode(inode); 903 624 ··· 919 638 920 639 f2fs_balance_fs(sbi); 921 640 repeat: 641 + err = f2fs_convert_inline_data(inode, pos + len); 642 + if (err) 643 + return err; 644 + 922 645 page = grab_cache_page_write_begin(mapping, index, flags); 923 646 if (!page) 924 647 return -ENOMEM; 925 648 *pagep = page; 926 649 650 + if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) 651 + goto inline_data; 652 + 927 653 f2fs_lock_op(sbi); 928 - 929 654 set_new_dnode(&dn, inode, NULL, NULL, 0); 930 - err = get_dnode_of_data(&dn, index, ALLOC_NODE); 931 - if (err) 932 - goto err; 933 - 934 - if (dn.data_blkaddr == NULL_ADDR) 935 - err = reserve_new_block(&dn); 936 - 937 - f2fs_put_dnode(&dn); 938 - if (err) 939 - goto err; 940 - 655 + err = f2fs_reserve_block(&dn, index); 941 656 f2fs_unlock_op(sbi); 942 657 658 + if (err) { 659 + f2fs_put_page(page, 1); 660 + return err; 661 + } 662 + inline_data: 943 663 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 944 664 return 0; 945 665 ··· 956 674 if (dn.data_blkaddr == NEW_ADDR) { 957 675 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 958 676 } else { 959 - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 677 + if (f2fs_has_inline_data(inode)) 678 + err = f2fs_read_inline_data(inode, page); 679 + else 680 + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 681 + READ_SYNC); 960 682 if (err) 961 683 return err; 962 684 lock_page(page); 963 - if (!PageUptodate(page)) { 685 + if (unlikely(!PageUptodate(page))) { 964 686 f2fs_put_page(page, 1); 965 687 return -EIO; 966 688 } 967 - if (page->mapping != mapping) { 689 + if (unlikely(page->mapping != mapping)) { 968 690 f2fs_put_page(page, 1); 969 691 goto repeat; 970 692 } ··· 977 691 SetPageUptodate(page); 978 692 clear_cold_data(page); 979 693 return 0; 980 - 981 - err: 982 - f2fs_unlock_op(sbi); 983 - f2fs_put_page(page, 1); 984 - return err; 985 694 } 986 695 987 696 static int f2fs_write_end(struct file *file, ··· 995 714 update_inode_page(inode); 996 715 } 997 716 998 - unlock_page(page); 999 - page_cache_release(page); 717 + f2fs_put_page(page, 1); 1000 718 return copied; 719 + } 720 + 721 + static int check_direct_IO(struct inode *inode, int rw, 722 + const struct iovec *iov, loff_t offset, unsigned long nr_segs) 723 + { 724 + unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 725 + int i; 726 + 727 + if (rw == READ) 728 + return 0; 729 + 730 + if (offset & blocksize_mask) 731 + return -EINVAL; 732 + 733 + for (i = 0; i < nr_segs; i++) 734 + if (iov[i].iov_len & blocksize_mask) 735 + return -EINVAL; 736 + return 0; 1001 737 } 1002 738 1003 739 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, ··· 1023 725 struct file *file = iocb->ki_filp; 1024 726 struct inode *inode = file->f_mapping->host; 1025 727 1026 - if (rw == WRITE) 728 + /* Let buffer I/O handle the inline data case. */ 729 + if (f2fs_has_inline_data(inode)) 1027 730 return 0; 1028 731 1029 - /* Needs synchronization with the cleaner */ 732 + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) 733 + return 0; 734 + 1030 735 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1031 - get_data_block_ro); 736 + get_data_block); 1032 737 } 1033 738 1034 739 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, ··· 1060 759 trace_f2fs_set_page_dirty(page, DATA); 1061 760 1062 761 SetPageUptodate(page); 762 + mark_inode_dirty(inode); 763 + 1063 764 if (!PageDirty(page)) { 1064 765 __set_page_dirty_nobuffers(page); 1065 766 set_dirty_dir_page(inode, page); ··· 1072 769 1073 770 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1074 771 { 1075 - return generic_block_bmap(mapping, block, get_data_block_ro); 772 + return generic_block_bmap(mapping, block, get_data_block); 1076 773 } 1077 774 1078 775 const struct address_space_operations f2fs_dblock_aops = {

+37 -16

fs/f2fs/debug.c

··· 24 24 #include "gc.h" 25 25 26 26 static LIST_HEAD(f2fs_stat_list); 27 - static struct dentry *debugfs_root; 27 + static struct dentry *f2fs_debugfs_root; 28 28 static DEFINE_MUTEX(f2fs_stat_mutex); 29 29 30 30 static void update_general_status(struct f2fs_sb_info *sbi) ··· 45 45 si->valid_count = valid_user_blocks(sbi); 46 46 si->valid_node_count = valid_node_count(sbi); 47 47 si->valid_inode_count = valid_inode_count(sbi); 48 + si->inline_inode = sbi->inline_inode; 48 49 si->utilization = utilization(sbi); 49 50 50 51 si->free_segs = free_segments(sbi); 51 52 si->free_secs = free_sections(sbi); 52 53 si->prefree_count = prefree_segments(sbi); 53 54 si->dirty_count = dirty_segments(sbi); 54 - si->node_pages = sbi->node_inode->i_mapping->nrpages; 55 - si->meta_pages = sbi->meta_inode->i_mapping->nrpages; 55 + si->node_pages = NODE_MAPPING(sbi)->nrpages; 56 + si->meta_pages = META_MAPPING(sbi)->nrpages; 56 57 si->nats = NM_I(sbi)->nat_cnt; 57 58 si->sits = SIT_I(sbi)->dirty_sentries; 58 59 si->fnids = NM_I(sbi)->fcnt; ··· 166 165 /* free nids */ 167 166 si->cache_mem = NM_I(sbi)->fcnt; 168 167 si->cache_mem += NM_I(sbi)->nat_cnt; 169 - npages = sbi->node_inode->i_mapping->nrpages; 168 + npages = NODE_MAPPING(sbi)->nrpages; 170 169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 171 - npages = sbi->meta_inode->i_mapping->nrpages; 170 + npages = META_MAPPING(sbi)->nrpages; 172 171 si->cache_mem += npages << PAGE_CACHE_SHIFT; 173 172 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 174 173 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); ··· 201 200 seq_printf(s, "Other: %u)\n - Data: %u\n", 202 201 si->valid_node_count - si->valid_inode_count, 203 202 si->valid_count - si->valid_node_count); 203 + seq_printf(s, " - Inline_data Inode: %u\n", 204 + si->inline_inode); 204 205 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 205 206 si->main_area_segs, si->main_area_sections, 206 207 si->main_area_zones); ··· 245 242 seq_printf(s, " - node blocks : %d\n", si->node_blks); 246 243 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 247 244 si->hit_ext, si->total_ext); 248 - seq_printf(s, "\nBalancing F2FS Async:\n"); 249 - seq_printf(s, " - nodes %4d in %4d\n", 245 + seq_puts(s, "\nBalancing F2FS Async:\n"); 246 + seq_printf(s, " - nodes: %4d in %4d\n", 250 247 si->ndirty_node, si->node_pages); 251 - seq_printf(s, " - dents %4d in dirs:%4d\n", 248 + seq_printf(s, " - dents: %4d in dirs:%4d\n", 252 249 si->ndirty_dent, si->ndirty_dirs); 253 - seq_printf(s, " - meta %4d in %4d\n", 250 + seq_printf(s, " - meta: %4d in %4d\n", 254 251 si->ndirty_meta, si->meta_pages); 255 - seq_printf(s, " - NATs %5d > %lu\n", 252 + seq_printf(s, " - NATs: %5d > %lu\n", 256 253 si->nats, NM_WOUT_THRESHOLD); 257 254 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", 258 255 si->sits, si->fnids); ··· 343 340 344 341 void __init f2fs_create_root_stats(void) 345 342 { 346 - debugfs_root = debugfs_create_dir("f2fs", NULL); 347 - if (debugfs_root) 348 - debugfs_create_file("status", S_IRUGO, debugfs_root, 349 - NULL, &stat_fops); 343 + struct dentry *file; 344 + 345 + f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 346 + if (!f2fs_debugfs_root) 347 + goto bail; 348 + 349 + file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 350 + NULL, &stat_fops); 351 + if (!file) 352 + goto free_debugfs_dir; 353 + 354 + return; 355 + 356 + free_debugfs_dir: 357 + debugfs_remove(f2fs_debugfs_root); 358 + 359 + bail: 360 + f2fs_debugfs_root = NULL; 361 + return; 350 362 } 351 363 352 364 void f2fs_destroy_root_stats(void) 353 365 { 354 - debugfs_remove_recursive(debugfs_root); 355 - debugfs_root = NULL; 366 + if (!f2fs_debugfs_root) 367 + return; 368 + 369 + debugfs_remove_recursive(f2fs_debugfs_root); 370 + f2fs_debugfs_root = NULL; 356 371 }

+20 -27

fs/f2fs/dir.c

··· 190 190 unsigned int max_depth; 191 191 unsigned int level; 192 192 193 - if (namelen > F2FS_NAME_LEN) 194 - return NULL; 195 - 196 193 if (npages == 0) 197 194 return NULL; 198 195 ··· 256 259 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 257 260 mark_inode_dirty(dir); 258 261 259 - /* update parent inode number before releasing dentry page */ 260 - F2FS_I(inode)->i_pino = dir->i_ino; 261 - 262 262 f2fs_put_page(page, 1); 263 263 } 264 264 265 265 static void init_dent_inode(const struct qstr *name, struct page *ipage) 266 266 { 267 - struct f2fs_node *rn; 267 + struct f2fs_inode *ri; 268 268 269 269 /* copy name info. to this inode page */ 270 - rn = F2FS_NODE(ipage); 271 - rn->i.i_namelen = cpu_to_le32(name->len); 272 - memcpy(rn->i.i_name, name->name, name->len); 270 + ri = F2FS_INODE(ipage); 271 + ri->i_namelen = cpu_to_le32(name->len); 272 + memcpy(ri->i_name, name->name, name->len); 273 273 set_page_dirty(ipage); 274 274 } 275 275 ··· 342 348 343 349 err = f2fs_init_acl(inode, dir, page); 344 350 if (err) 345 - goto error; 351 + goto put_error; 346 352 347 353 err = f2fs_init_security(inode, dir, name, page); 348 354 if (err) 349 - goto error; 355 + goto put_error; 350 356 351 357 wait_on_page_writeback(page); 352 358 } else { ··· 370 376 } 371 377 return page; 372 378 373 - error: 379 + put_error: 374 380 f2fs_put_page(page, 1); 381 + error: 375 382 remove_inode_page(inode); 376 383 return ERR_PTR(err); 377 384 } ··· 388 393 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); 389 394 } 390 395 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 396 + mark_inode_dirty(dir); 397 + 391 398 if (F2FS_I(dir)->i_current_depth != current_depth) { 392 399 F2FS_I(dir)->i_current_depth = current_depth; 393 400 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); ··· 397 400 398 401 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) 399 402 update_inode_page(dir); 400 - else 401 - mark_inode_dirty(dir); 402 403 403 404 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 404 405 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); ··· 427 432 } 428 433 429 434 /* 430 - * Caller should grab and release a mutex by calling mutex_lock_op() and 431 - * mutex_unlock_op(). 435 + * Caller should grab and release a rwsem by calling f2fs_lock_op() and 436 + * f2fs_unlock_op(). 432 437 */ 433 - int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) 438 + int __f2fs_add_link(struct inode *dir, const struct qstr *name, 439 + struct inode *inode) 434 440 { 435 441 unsigned int bit_pos; 436 442 unsigned int level; ··· 457 461 } 458 462 459 463 start: 460 - if (current_depth == MAX_DIR_HASH_DEPTH) 464 + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) 461 465 return -ENOSPC; 462 466 463 467 /* Increase the depth, if required */ ··· 550 554 551 555 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 552 556 553 - if (inode && S_ISDIR(inode->i_mode)) { 554 - drop_nlink(dir); 555 - update_inode_page(dir); 556 - } else { 557 - mark_inode_dirty(dir); 558 - } 559 - 560 557 if (inode) { 558 + if (S_ISDIR(inode->i_mode)) { 559 + drop_nlink(dir); 560 + update_inode_page(dir); 561 + } 561 562 inode->i_ctime = CURRENT_TIME; 562 563 drop_nlink(inode); 563 564 if (S_ISDIR(inode->i_mode)) { ··· 629 636 630 637 bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); 631 638 632 - for ( ; n < npages; n++) { 639 + for (; n < npages; n++) { 633 640 dentry_page = get_lock_data_page(inode, n); 634 641 if (IS_ERR(dentry_page)) 635 642 continue;

+147 -48

fs/f2fs/f2fs.h

··· 22 22 23 23 #ifdef CONFIG_F2FS_CHECK_FS 24 24 #define f2fs_bug_on(condition) BUG_ON(condition) 25 + #define f2fs_down_write(x, y) down_write_nest_lock(x, y) 25 26 #else 26 27 #define f2fs_bug_on(condition) 28 + #define f2fs_down_write(x, y) down_write(x) 27 29 #endif 28 30 29 31 /* ··· 39 37 #define F2FS_MOUNT_POSIX_ACL 0x00000020 40 38 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 41 39 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 40 + #define F2FS_MOUNT_INLINE_DATA 0x00000100 42 41 43 42 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 44 43 #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) ··· 100 97 struct inode *inode; /* vfs inode pointer */ 101 98 }; 102 99 100 + /* for the list of blockaddresses to be discarded */ 101 + struct discard_entry { 102 + struct list_head list; /* list head */ 103 + block_t blkaddr; /* block address to be discarded */ 104 + int len; /* # of consecutive blocks of the discard */ 105 + }; 106 + 103 107 /* for the list of fsync inodes, used only during recovery */ 104 108 struct fsync_inode_entry { 105 109 struct list_head list; /* list head */ ··· 165 155 LOOKUP_NODE, /* look up a node without readahead */ 166 156 LOOKUP_NODE_RA, /* 167 157 * look up a node with readahead called 168 - * by get_datablock_ro. 158 + * by get_data_block. 169 159 */ 170 160 }; 171 161 172 162 #define F2FS_LINK_MAX 32000 /* maximum link count per file */ 173 163 174 164 /* for in-memory extent cache entry */ 165 + #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ 166 + 175 167 struct extent_info { 176 168 rwlock_t ext_lock; /* rwlock for consistency */ 177 169 unsigned int fofs; /* start offset in a file */ ··· 320 308 321 309 /* a threshold to reclaim prefree segments */ 322 310 unsigned int rec_prefree_segments; 311 + 312 + /* for small discard management */ 313 + struct list_head discard_list; /* 4KB discard list */ 314 + int nr_discards; /* # of discards in the list */ 315 + int max_discards; /* max. discards to be issued */ 316 + 317 + unsigned int ipu_policy; /* in-place-update policy */ 318 + unsigned int min_ipu_util; /* in-place-update threshold */ 323 319 }; 324 320 325 321 /* ··· 358 338 * with waiting the bio's completion 359 339 * ... Only can be used with META. 360 340 */ 341 + #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) 361 342 enum page_type { 362 343 DATA, 363 344 NODE, 364 345 META, 365 346 NR_PAGE_TYPE, 366 347 META_FLUSH, 348 + }; 349 + 350 + struct f2fs_io_info { 351 + enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ 352 + int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ 353 + }; 354 + 355 + #define is_read_io(rw) (((rw) & 1) == READ) 356 + struct f2fs_bio_info { 357 + struct f2fs_sb_info *sbi; /* f2fs superblock */ 358 + struct bio *bio; /* bios to merge */ 359 + sector_t last_block_in_bio; /* last block number */ 360 + struct f2fs_io_info fio; /* store buffered io info. */ 361 + struct mutex io_mutex; /* mutex for bio */ 367 362 }; 368 363 369 364 struct f2fs_sb_info { ··· 394 359 395 360 /* for segment-related operations */ 396 361 struct f2fs_sm_info *sm_info; /* segment manager */ 397 - struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ 398 - sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ 399 - struct rw_semaphore bio_sem; /* IO semaphore */ 362 + 363 + /* for bio operations */ 364 + struct f2fs_bio_info read_io; /* for read bios */ 365 + struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 400 366 401 367 /* for checkpoint */ 402 368 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ ··· 412 376 413 377 /* for orphan inode management */ 414 378 struct list_head orphan_inode_list; /* orphan inode list */ 415 - struct mutex orphan_inode_mutex; /* for orphan inode list */ 379 + spinlock_t orphan_inode_lock; /* for orphan inode list */ 416 380 unsigned int n_orphans; /* # of orphan inodes */ 381 + unsigned int max_orphans; /* max orphan inodes */ 417 382 418 383 /* for directory inode management */ 419 384 struct list_head dir_inode_list; /* dir inode list */ ··· 451 414 struct f2fs_gc_kthread *gc_thread; /* GC thread */ 452 415 unsigned int cur_victim_sec; /* current victim section num */ 453 416 417 + /* maximum # of trials to find a victim segment for SSR and GC */ 418 + unsigned int max_victim_search; 419 + 454 420 /* 455 421 * for stat information. 456 422 * one is for the LFS mode, and the other is for the SSR mode. ··· 463 423 unsigned int segment_count[2]; /* # of allocated segments */ 464 424 unsigned int block_count[2]; /* # of allocated blocks */ 465 425 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ 426 + int inline_inode; /* # of inline_data inodes */ 466 427 int bg_gc; /* background gc calls */ 467 428 unsigned int n_dirty_dirs; /* # of dir inodes */ 468 429 #endif ··· 503 462 return (struct f2fs_node *)page_address(page); 504 463 } 505 464 465 + static inline struct f2fs_inode *F2FS_INODE(struct page *page) 466 + { 467 + return &((struct f2fs_node *)page_address(page))->i; 468 + } 469 + 506 470 static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) 507 471 { 508 472 return (struct f2fs_nm_info *)(sbi->nm_info); ··· 531 485 static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) 532 486 { 533 487 return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); 488 + } 489 + 490 + static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi) 491 + { 492 + return sbi->meta_inode->i_mapping; 493 + } 494 + 495 + static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) 496 + { 497 + return sbi->node_inode->i_mapping; 534 498 } 535 499 536 500 static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) ··· 590 534 591 535 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) 592 536 { 593 - down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); 537 + f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex); 594 538 } 595 539 596 540 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) ··· 604 548 static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) 605 549 { 606 550 WARN_ON((nid >= NM_I(sbi)->max_nid)); 607 - if (nid >= NM_I(sbi)->max_nid) 551 + if (unlikely(nid >= NM_I(sbi)->max_nid)) 608 552 return -EINVAL; 609 553 return 0; 610 554 } ··· 617 561 static inline int F2FS_HAS_BLOCKS(struct inode *inode) 618 562 { 619 563 if (F2FS_I(inode)->i_xattr_nid) 620 - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); 564 + return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; 621 565 else 622 - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); 566 + return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; 623 567 } 624 568 625 569 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, ··· 630 574 spin_lock(&sbi->stat_lock); 631 575 valid_block_count = 632 576 sbi->total_valid_block_count + (block_t)count; 633 - if (valid_block_count > sbi->user_block_count) { 577 + if (unlikely(valid_block_count > sbi->user_block_count)) { 634 578 spin_unlock(&sbi->stat_lock); 635 579 return false; 636 580 } ··· 641 585 return true; 642 586 } 643 587 644 - static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, 588 + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, 645 589 struct inode *inode, 646 590 blkcnt_t count) 647 591 { ··· 651 595 inode->i_blocks -= count; 652 596 sbi->total_valid_block_count -= (block_t)count; 653 597 spin_unlock(&sbi->stat_lock); 654 - return 0; 655 598 } 656 599 657 600 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) ··· 741 686 } 742 687 743 688 static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, 744 - struct inode *inode, 745 - unsigned int count) 689 + struct inode *inode) 746 690 { 747 691 block_t valid_block_count; 748 692 unsigned int valid_node_count; 749 693 750 694 spin_lock(&sbi->stat_lock); 751 695 752 - valid_block_count = sbi->total_valid_block_count + (block_t)count; 753 - sbi->alloc_valid_block_count += (block_t)count; 754 - valid_node_count = sbi->total_valid_node_count + count; 755 - 756 - if (valid_block_count > sbi->user_block_count) { 696 + valid_block_count = sbi->total_valid_block_count + 1; 697 + if (unlikely(valid_block_count > sbi->user_block_count)) { 757 698 spin_unlock(&sbi->stat_lock); 758 699 return false; 759 700 } 760 701 761 - if (valid_node_count > sbi->total_node_count) { 702 + valid_node_count = sbi->total_valid_node_count + 1; 703 + if (unlikely(valid_node_count > sbi->total_node_count)) { 762 704 spin_unlock(&sbi->stat_lock); 763 705 return false; 764 706 } 765 707 766 708 if (inode) 767 - inode->i_blocks += count; 768 - sbi->total_valid_node_count = valid_node_count; 769 - sbi->total_valid_block_count = valid_block_count; 709 + inode->i_blocks++; 710 + 711 + sbi->alloc_valid_block_count++; 712 + sbi->total_valid_node_count++; 713 + sbi->total_valid_block_count++; 770 714 spin_unlock(&sbi->stat_lock); 771 715 772 716 return true; 773 717 } 774 718 775 719 static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, 776 - struct inode *inode, 777 - unsigned int count) 720 + struct inode *inode) 778 721 { 779 722 spin_lock(&sbi->stat_lock); 780 723 781 - f2fs_bug_on(sbi->total_valid_block_count < count); 782 - f2fs_bug_on(sbi->total_valid_node_count < count); 783 - f2fs_bug_on(inode->i_blocks < count); 724 + f2fs_bug_on(!sbi->total_valid_block_count); 725 + f2fs_bug_on(!sbi->total_valid_node_count); 726 + f2fs_bug_on(!inode->i_blocks); 784 727 785 - inode->i_blocks -= count; 786 - sbi->total_valid_node_count -= count; 787 - sbi->total_valid_block_count -= (block_t)count; 728 + inode->i_blocks--; 729 + sbi->total_valid_node_count--; 730 + sbi->total_valid_block_count--; 788 731 789 732 spin_unlock(&sbi->stat_lock); 790 733 } ··· 804 751 spin_unlock(&sbi->stat_lock); 805 752 } 806 753 807 - static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) 754 + static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 808 755 { 809 756 spin_lock(&sbi->stat_lock); 810 757 f2fs_bug_on(!sbi->total_valid_inode_count); 811 758 sbi->total_valid_inode_count--; 812 759 spin_unlock(&sbi->stat_lock); 813 - return 0; 814 760 } 815 761 816 762 static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) ··· 823 771 824 772 static inline void f2fs_put_page(struct page *page, int unlock) 825 773 { 826 - if (!page || IS_ERR(page)) 774 + if (!page) 827 775 return; 828 776 829 777 if (unlock) { ··· 928 876 FI_NO_ALLOC, /* should not allocate any blocks */ 929 877 FI_UPDATE_DIR, /* should update inode block for consistency */ 930 878 FI_DELAY_IPUT, /* used for the recovery */ 879 + FI_NO_EXTENT, /* not to use the extent cache */ 931 880 FI_INLINE_XATTR, /* used for inline xattr */ 881 + FI_INLINE_DATA, /* used for inline data*/ 932 882 }; 933 883 934 884 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) ··· 968 914 { 969 915 if (ri->i_inline & F2FS_INLINE_XATTR) 970 916 set_inode_flag(fi, FI_INLINE_XATTR); 917 + if (ri->i_inline & F2FS_INLINE_DATA) 918 + set_inode_flag(fi, FI_INLINE_DATA); 971 919 } 972 920 973 921 static inline void set_raw_inline(struct f2fs_inode_info *fi, ··· 979 923 980 924 if (is_inode_flag_set(fi, FI_INLINE_XATTR)) 981 925 ri->i_inline |= F2FS_INLINE_XATTR; 926 + if (is_inode_flag_set(fi, FI_INLINE_DATA)) 927 + ri->i_inline |= F2FS_INLINE_DATA; 982 928 } 983 929 984 930 static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) ··· 1006 948 return 0; 1007 949 } 1008 950 951 + static inline int f2fs_has_inline_data(struct inode *inode) 952 + { 953 + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); 954 + } 955 + 956 + static inline void *inline_data_addr(struct page *page) 957 + { 958 + struct f2fs_inode *ri; 959 + ri = (struct f2fs_inode *)page_address(page); 960 + return (void *)&(ri->i_addr[1]); 961 + } 962 + 1009 963 static inline int f2fs_readonly(struct super_block *sb) 1010 964 { 1011 965 return sb->s_flags & MS_RDONLY; ··· 1028 958 */ 1029 959 int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1030 960 void truncate_data_blocks(struct dnode_of_data *); 961 + int truncate_blocks(struct inode *, u64); 1031 962 void f2fs_truncate(struct inode *); 1032 963 int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1033 964 int f2fs_setattr(struct dentry *, struct iattr *); ··· 1098 1027 int truncate_inode_blocks(struct inode *, pgoff_t); 1099 1028 int truncate_xattr_node(struct inode *, struct page *); 1100 1029 int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1101 - int remove_inode_page(struct inode *); 1030 + void remove_inode_page(struct inode *); 1102 1031 struct page *new_inode_page(struct inode *, const struct qstr *); 1103 1032 struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1104 1033 void ra_node_page(struct f2fs_sb_info *, nid_t); ··· 1130 1059 int npages_for_summary_flush(struct f2fs_sb_info *); 1131 1060 void allocate_new_segments(struct f2fs_sb_info *); 1132 1061 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1133 - struct bio *f2fs_bio_alloc(struct block_device *, int); 1134 - void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); 1135 - void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); 1136 1062 void write_meta_page(struct f2fs_sb_info *, struct page *); 1137 - void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, 1138 - block_t, block_t *); 1139 - void write_data_page(struct inode *, struct page *, struct dnode_of_data*, 1140 - block_t, block_t *); 1141 - void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); 1063 + void write_node_page(struct f2fs_sb_info *, struct page *, 1064 + struct f2fs_io_info *, unsigned int, block_t, block_t *); 1065 + void write_data_page(struct page *, struct dnode_of_data *, block_t *, 1066 + struct f2fs_io_info *); 1067 + void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); 1142 1068 void recover_data_page(struct f2fs_sb_info *, struct page *, 1143 1069 struct f2fs_summary *, block_t, block_t); 1144 1070 void rewrite_node_page(struct f2fs_sb_info *, struct page *, 1145 1071 struct f2fs_summary *, block_t, block_t); 1072 + void allocate_data_block(struct f2fs_sb_info *, struct page *, 1073 + block_t, block_t *, struct f2fs_summary *, int); 1074 + void f2fs_wait_on_page_writeback(struct page *, enum page_type); 1146 1075 void write_data_summaries(struct f2fs_sb_info *, block_t); 1147 1076 void write_node_summaries(struct f2fs_sb_info *, block_t); 1148 1077 int lookup_journal_in_cursum(struct f2fs_summary_block *, ··· 1150 1079 void flush_sit_entries(struct f2fs_sb_info *); 1151 1080 int build_segment_manager(struct f2fs_sb_info *); 1152 1081 void destroy_segment_manager(struct f2fs_sb_info *); 1082 + int __init create_segment_manager_caches(void); 1083 + void destroy_segment_manager_caches(void); 1153 1084 1154 1085 /* 1155 1086 * checkpoint.c ··· 1163 1090 void release_orphan_inode(struct f2fs_sb_info *); 1164 1091 void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1165 1092 void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1166 - int recover_orphan_inodes(struct f2fs_sb_info *); 1093 + void recover_orphan_inodes(struct f2fs_sb_info *); 1167 1094 int get_valid_checkpoint(struct f2fs_sb_info *); 1168 1095 void set_dirty_dir_page(struct inode *, struct page *); 1169 1096 void add_dirty_dir_inode(struct inode *); ··· 1178 1105 /* 1179 1106 * data.c 1180 1107 */ 1108 + void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); 1109 + int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); 1110 + void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, 1111 + struct f2fs_io_info *); 1181 1112 int reserve_new_block(struct dnode_of_data *); 1113 + int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1182 1114 void update_extent_cache(block_t, struct dnode_of_data *); 1183 1115 struct page *find_data_page(struct inode *, pgoff_t, bool); 1184 1116 struct page *get_lock_data_page(struct inode *, pgoff_t); 1185 1117 struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1186 - int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); 1187 - int do_write_data_page(struct page *); 1118 + int do_write_data_page(struct page *, struct f2fs_io_info *); 1188 1119 1189 1120 /* 1190 1121 * gc.c ··· 1221 1144 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1222 1145 int nats, sits, fnids; 1223 1146 int total_count, utilization; 1224 - int bg_gc; 1147 + int bg_gc, inline_inode; 1225 1148 unsigned int valid_count, valid_node_count, valid_inode_count; 1226 1149 unsigned int bimodal, avg_vblocks; 1227 1150 int util_free, util_valid, util_invalid; ··· 1241 1164 1242 1165 static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) 1243 1166 { 1244 - return (struct f2fs_stat_info*)sbi->stat_info; 1167 + return (struct f2fs_stat_info *)sbi->stat_info; 1245 1168 } 1246 1169 1247 1170 #define stat_inc_call_count(si) ((si)->call_count++) ··· 1250 1173 #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1251 1174 #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) 1252 1175 #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) 1176 + #define stat_inc_inline_inode(inode) \ 1177 + do { \ 1178 + if (f2fs_has_inline_data(inode)) \ 1179 + ((F2FS_SB(inode->i_sb))->inline_inode++); \ 1180 + } while (0) 1181 + #define stat_dec_inline_inode(inode) \ 1182 + do { \ 1183 + if (f2fs_has_inline_data(inode)) \ 1184 + ((F2FS_SB(inode->i_sb))->inline_inode--); \ 1185 + } while (0) 1186 + 1253 1187 #define stat_inc_seg_type(sbi, curseg) \ 1254 1188 ((sbi)->segment_count[(curseg)->alloc_type]++) 1255 1189 #define stat_inc_block_count(sbi, curseg) \ ··· 1304 1216 #define stat_dec_dirty_dir(sbi) 1305 1217 #define stat_inc_total_hit(sb) 1306 1218 #define stat_inc_read_hit(sb) 1219 + #define stat_inc_inline_inode(inode) 1220 + #define stat_dec_inline_inode(inode) 1307 1221 #define stat_inc_seg_type(sbi, curseg) 1308 1222 #define stat_inc_block_count(sbi, curseg) 1309 1223 #define stat_inc_seg_count(si, type) ··· 1328 1238 extern const struct inode_operations f2fs_dir_inode_operations; 1329 1239 extern const struct inode_operations f2fs_symlink_inode_operations; 1330 1240 extern const struct inode_operations f2fs_special_inode_operations; 1241 + 1242 + /* 1243 + * inline.c 1244 + */ 1245 + bool f2fs_may_inline(struct inode *); 1246 + int f2fs_read_inline_data(struct inode *, struct page *); 1247 + int f2fs_convert_inline_data(struct inode *, pgoff_t); 1248 + int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); 1249 + int recover_inline_data(struct inode *, struct page *); 1331 1250 #endif

+33 -51

fs/f2fs/file.c

··· 33 33 struct page *page = vmf->page; 34 34 struct inode *inode = file_inode(vma->vm_file); 35 35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 36 - block_t old_blk_addr; 37 36 struct dnode_of_data dn; 38 37 int err; 39 38 ··· 43 44 /* block allocation */ 44 45 f2fs_lock_op(sbi); 45 46 set_new_dnode(&dn, inode, NULL, NULL, 0); 46 - err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); 47 - if (err) { 48 - f2fs_unlock_op(sbi); 49 - goto out; 50 - } 51 - 52 - old_blk_addr = dn.data_blkaddr; 53 - 54 - if (old_blk_addr == NULL_ADDR) { 55 - err = reserve_new_block(&dn); 56 - if (err) { 57 - f2fs_put_dnode(&dn); 58 - f2fs_unlock_op(sbi); 59 - goto out; 60 - } 61 - } 62 - f2fs_put_dnode(&dn); 47 + err = f2fs_reserve_block(&dn, page->index); 63 48 f2fs_unlock_op(sbi); 49 + if (err) 50 + goto out; 64 51 65 52 file_update_time(vma->vm_file); 66 53 lock_page(page); 67 - if (page->mapping != inode->i_mapping || 54 + if (unlikely(page->mapping != inode->i_mapping || 68 55 page_offset(page) > i_size_read(inode) || 69 - !PageUptodate(page)) { 56 + !PageUptodate(page))) { 70 57 unlock_page(page); 71 58 err = -EFAULT; 72 59 goto out; ··· 115 130 int ret = 0; 116 131 bool need_cp = false; 117 132 struct writeback_control wbc = { 118 - .sync_mode = WB_SYNC_ALL, 133 + .sync_mode = WB_SYNC_NONE, 119 134 .nr_to_write = LONG_MAX, 120 135 .for_reclaim = 0, 121 136 }; 122 137 123 - if (f2fs_readonly(inode->i_sb)) 138 + if (unlikely(f2fs_readonly(inode->i_sb))) 124 139 return 0; 125 140 126 141 trace_f2fs_sync_file_enter(inode); ··· 202 217 raw_node = F2FS_NODE(dn->node_page); 203 218 addr = blkaddr_in_node(raw_node) + ofs; 204 219 205 - for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { 220 + for (; count > 0; count--, addr++, dn->ofs_in_node++) { 206 221 block_t blkaddr = le32_to_cpu(*addr); 207 222 if (blkaddr == NULL_ADDR) 208 223 continue; ··· 241 256 return; 242 257 243 258 lock_page(page); 244 - if (page->mapping != inode->i_mapping) { 259 + if (unlikely(page->mapping != inode->i_mapping)) { 245 260 f2fs_put_page(page, 1); 246 261 return; 247 262 } ··· 251 266 f2fs_put_page(page, 1); 252 267 } 253 268 254 - static int truncate_blocks(struct inode *inode, u64 from) 269 + int truncate_blocks(struct inode *inode, u64 from) 255 270 { 256 271 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 257 272 unsigned int blocksize = inode->i_sb->s_blocksize; 258 273 struct dnode_of_data dn; 259 274 pgoff_t free_from; 260 - int count = 0; 261 - int err; 275 + int count = 0, err = 0; 262 276 263 277 trace_f2fs_truncate_blocks_enter(inode, from); 278 + 279 + if (f2fs_has_inline_data(inode)) 280 + goto done; 264 281 265 282 free_from = (pgoff_t) 266 283 ((from + blocksize - 1) >> (sbi->log_blocksize)); 267 284 268 285 f2fs_lock_op(sbi); 286 + 269 287 set_new_dnode(&dn, inode, NULL, NULL, 0); 270 288 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 271 289 if (err) { ··· 296 308 free_next: 297 309 err = truncate_inode_blocks(inode, free_from); 298 310 f2fs_unlock_op(sbi); 299 - 311 + done: 300 312 /* lastly zero out the first data page */ 301 313 truncate_partial_data_page(inode, from); 302 314 ··· 370 382 371 383 if ((attr->ia_valid & ATTR_SIZE) && 372 384 attr->ia_size != i_size_read(inode)) { 385 + err = f2fs_convert_inline_data(inode, attr->ia_size); 386 + if (err) 387 + return err; 388 + 373 389 truncate_setsize(inode, attr->ia_size); 374 390 f2fs_truncate(inode); 375 391 f2fs_balance_fs(F2FS_SB(inode->i_sb)); ··· 451 459 return 0; 452 460 } 453 461 454 - static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) 462 + static int punch_hole(struct inode *inode, loff_t offset, loff_t len) 455 463 { 456 464 pgoff_t pg_start, pg_end; 457 465 loff_t off_start, off_end; 458 466 int ret = 0; 467 + 468 + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); 469 + if (ret) 470 + return ret; 459 471 460 472 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 461 473 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; ··· 495 499 } 496 500 } 497 501 498 - if (!(mode & FALLOC_FL_KEEP_SIZE) && 499 - i_size_read(inode) <= (offset + len)) { 500 - i_size_write(inode, offset); 501 - mark_inode_dirty(inode); 502 - } 503 - 504 502 return ret; 505 503 } 506 504 ··· 511 521 if (ret) 512 522 return ret; 513 523 524 + ret = f2fs_convert_inline_data(inode, offset + len); 525 + if (ret) 526 + return ret; 527 + 514 528 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 515 529 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; 516 530 ··· 526 532 527 533 f2fs_lock_op(sbi); 528 534 set_new_dnode(&dn, inode, NULL, NULL, 0); 529 - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); 530 - if (ret) { 531 - f2fs_unlock_op(sbi); 532 - break; 533 - } 534 - 535 - if (dn.data_blkaddr == NULL_ADDR) { 536 - ret = reserve_new_block(&dn); 537 - if (ret) { 538 - f2fs_put_dnode(&dn); 539 - f2fs_unlock_op(sbi); 540 - break; 541 - } 542 - } 543 - f2fs_put_dnode(&dn); 535 + ret = f2fs_reserve_block(&dn, index); 544 536 f2fs_unlock_op(sbi); 537 + if (ret) 538 + break; 545 539 546 540 if (pg_start == pg_end) 547 541 new_size = offset + len; ··· 560 578 return -EOPNOTSUPP; 561 579 562 580 if (mode & FALLOC_FL_PUNCH_HOLE) 563 - ret = punch_hole(inode, offset, len, mode); 581 + ret = punch_hole(inode, offset, len); 564 582 else 565 583 ret = expand_inode_data(inode, offset, len, mode); 566 584

+12 -10

fs/f2fs/gc.c

··· 119 119 kfree(gc_th); 120 120 sbi->gc_thread = NULL; 121 121 } 122 - 123 122 out: 124 123 return err; 125 124 } ··· 163 164 p->ofs_unit = sbi->segs_per_sec; 164 165 } 165 166 166 - if (p->max_search > MAX_VICTIM_SEARCH) 167 - p->max_search = MAX_VICTIM_SEARCH; 167 + if (p->max_search > sbi->max_victim_search) 168 + p->max_search = sbi->max_victim_search; 168 169 169 170 p->offset = sbi->last_victim[p->gc_mode]; 170 171 } ··· 428 429 429 430 /* set page dirty and write it */ 430 431 if (gc_type == FG_GC) { 431 - f2fs_wait_on_page_writeback(node_page, NODE, true); 432 + f2fs_wait_on_page_writeback(node_page, NODE); 432 433 set_page_dirty(node_page); 433 434 } else { 434 435 if (!PageWriteback(node_page)) ··· 520 521 521 522 static void move_data_page(struct inode *inode, struct page *page, int gc_type) 522 523 { 524 + struct f2fs_io_info fio = { 525 + .type = DATA, 526 + .rw = WRITE_SYNC, 527 + }; 528 + 523 529 if (gc_type == BG_GC) { 524 530 if (PageWriteback(page)) 525 531 goto out; ··· 533 529 } else { 534 530 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 535 531 536 - f2fs_wait_on_page_writeback(page, DATA, true); 532 + f2fs_wait_on_page_writeback(page, DATA); 537 533 538 534 if (clear_page_dirty_for_io(page) && 539 535 S_ISDIR(inode->i_mode)) { ··· 541 537 inode_dec_dirty_dents(inode); 542 538 } 543 539 set_cold_data(page); 544 - do_write_data_page(page); 540 + do_write_data_page(page, &fio); 545 541 clear_cold_data(page); 546 542 } 547 543 out: ··· 635 631 goto next_step; 636 632 637 633 if (gc_type == FG_GC) { 638 - f2fs_submit_bio(sbi, DATA, true); 634 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 639 635 640 636 /* 641 637 * In the case of FG_GC, it'd be better to reclaim this victim ··· 668 664 669 665 /* read segment summary of victim */ 670 666 sum_page = get_sum_page(sbi, segno); 671 - if (IS_ERR(sum_page)) 672 - return; 673 667 674 668 blk_start_plug(&plug); 675 669 ··· 699 697 700 698 INIT_LIST_HEAD(&ilist); 701 699 gc_more: 702 - if (!(sbi->sb->s_flags & MS_ACTIVE)) 700 + if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 703 701 goto stop; 704 702 705 703 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {

+1 -1

fs/f2fs/gc.h

··· 20 20 #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 21 21 22 22 /* Search max. number of dirty segments to select a victim segment */ 23 - #define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 23 + #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 24 24 25 25 struct f2fs_gc_kthread { 26 26 struct task_struct *f2fs_gc_task;

+222

fs/f2fs/inline.c

··· 1 + /* 2 + * fs/f2fs/inline.c 3 + * Copyright (c) 2013, Intel Corporation 4 + * Authors: Huajun Li <huajun.li@intel.com> 5 + * Haicheng Li <haicheng.li@intel.com> 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + 11 + #include <linux/fs.h> 12 + #include <linux/f2fs_fs.h> 13 + 14 + #include "f2fs.h" 15 + 16 + bool f2fs_may_inline(struct inode *inode) 17 + { 18 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 19 + block_t nr_blocks; 20 + loff_t i_size; 21 + 22 + if (!test_opt(sbi, INLINE_DATA)) 23 + return false; 24 + 25 + nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; 26 + if (inode->i_blocks > nr_blocks) 27 + return false; 28 + 29 + i_size = i_size_read(inode); 30 + if (i_size > MAX_INLINE_DATA) 31 + return false; 32 + 33 + return true; 34 + } 35 + 36 + int f2fs_read_inline_data(struct inode *inode, struct page *page) 37 + { 38 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 39 + struct page *ipage; 40 + void *src_addr, *dst_addr; 41 + 42 + if (page->index) { 43 + zero_user_segment(page, 0, PAGE_CACHE_SIZE); 44 + goto out; 45 + } 46 + 47 + ipage = get_node_page(sbi, inode->i_ino); 48 + if (IS_ERR(ipage)) 49 + return PTR_ERR(ipage); 50 + 51 + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 52 + 53 + /* Copy the whole inline data block */ 54 + src_addr = inline_data_addr(ipage); 55 + dst_addr = kmap(page); 56 + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 57 + kunmap(page); 58 + f2fs_put_page(ipage, 1); 59 + 60 + out: 61 + SetPageUptodate(page); 62 + unlock_page(page); 63 + 64 + return 0; 65 + } 66 + 67 + static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) 68 + { 69 + int err; 70 + struct page *ipage; 71 + struct dnode_of_data dn; 72 + void *src_addr, *dst_addr; 73 + block_t new_blk_addr; 74 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 75 + struct f2fs_io_info fio = { 76 + .type = DATA, 77 + .rw = WRITE_SYNC | REQ_PRIO, 78 + }; 79 + 80 + f2fs_lock_op(sbi); 81 + ipage = get_node_page(sbi, inode->i_ino); 82 + if (IS_ERR(ipage)) 83 + return PTR_ERR(ipage); 84 + 85 + /* 86 + * i_addr[0] is not used for inline data, 87 + * so reserving new block will not destroy inline data 88 + */ 89 + set_new_dnode(&dn, inode, ipage, NULL, 0); 90 + err = f2fs_reserve_block(&dn, 0); 91 + if (err) { 92 + f2fs_unlock_op(sbi); 93 + return err; 94 + } 95 + 96 + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 97 + 98 + /* Copy the whole inline data block */ 99 + src_addr = inline_data_addr(ipage); 100 + dst_addr = kmap(page); 101 + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 102 + kunmap(page); 103 + SetPageUptodate(page); 104 + 105 + /* write data page to try to make data consistent */ 106 + set_page_writeback(page); 107 + write_data_page(page, &dn, &new_blk_addr, &fio); 108 + update_extent_cache(new_blk_addr, &dn); 109 + f2fs_wait_on_page_writeback(page, DATA); 110 + 111 + /* clear inline data and flag after data writeback */ 112 + zero_user_segment(ipage, INLINE_DATA_OFFSET, 113 + INLINE_DATA_OFFSET + MAX_INLINE_DATA); 114 + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 115 + stat_dec_inline_inode(inode); 116 + 117 + sync_inode_page(&dn); 118 + f2fs_put_dnode(&dn); 119 + f2fs_unlock_op(sbi); 120 + return err; 121 + } 122 + 123 + int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) 124 + { 125 + struct page *page; 126 + int err; 127 + 128 + if (!f2fs_has_inline_data(inode)) 129 + return 0; 130 + else if (to_size <= MAX_INLINE_DATA) 131 + return 0; 132 + 133 + page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); 134 + if (!page) 135 + return -ENOMEM; 136 + 137 + err = __f2fs_convert_inline_data(inode, page); 138 + f2fs_put_page(page, 1); 139 + return err; 140 + } 141 + 142 + int f2fs_write_inline_data(struct inode *inode, 143 + struct page *page, unsigned size) 144 + { 145 + void *src_addr, *dst_addr; 146 + struct page *ipage; 147 + struct dnode_of_data dn; 148 + int err; 149 + 150 + set_new_dnode(&dn, inode, NULL, NULL, 0); 151 + err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); 152 + if (err) 153 + return err; 154 + ipage = dn.inode_page; 155 + 156 + zero_user_segment(ipage, INLINE_DATA_OFFSET, 157 + INLINE_DATA_OFFSET + MAX_INLINE_DATA); 158 + src_addr = kmap(page); 159 + dst_addr = inline_data_addr(ipage); 160 + memcpy(dst_addr, src_addr, size); 161 + kunmap(page); 162 + 163 + /* Release the first data block if it is allocated */ 164 + if (!f2fs_has_inline_data(inode)) { 165 + truncate_data_blocks_range(&dn, 1); 166 + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 167 + stat_inc_inline_inode(inode); 168 + } 169 + 170 + sync_inode_page(&dn); 171 + f2fs_put_dnode(&dn); 172 + 173 + return 0; 174 + } 175 + 176 + int recover_inline_data(struct inode *inode, struct page *npage) 177 + { 178 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 179 + struct f2fs_inode *ri = NULL; 180 + void *src_addr, *dst_addr; 181 + struct page *ipage; 182 + 183 + /* 184 + * The inline_data recovery policy is as follows. 185 + * [prev.] [next] of inline_data flag 186 + * o o -> recover inline_data 187 + * o x -> remove inline_data, and then recover data blocks 188 + * x o -> remove inline_data, and then recover inline_data 189 + * x x -> recover data blocks 190 + */ 191 + if (IS_INODE(npage)) 192 + ri = F2FS_INODE(npage); 193 + 194 + if (f2fs_has_inline_data(inode) && 195 + ri && ri->i_inline & F2FS_INLINE_DATA) { 196 + process_inline: 197 + ipage = get_node_page(sbi, inode->i_ino); 198 + f2fs_bug_on(IS_ERR(ipage)); 199 + 200 + src_addr = inline_data_addr(npage); 201 + dst_addr = inline_data_addr(ipage); 202 + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 203 + update_inode(inode, ipage); 204 + f2fs_put_page(ipage, 1); 205 + return -1; 206 + } 207 + 208 + if (f2fs_has_inline_data(inode)) { 209 + ipage = get_node_page(sbi, inode->i_ino); 210 + f2fs_bug_on(IS_ERR(ipage)); 211 + zero_user_segment(ipage, INLINE_DATA_OFFSET, 212 + INLINE_DATA_OFFSET + MAX_INLINE_DATA); 213 + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 214 + update_inode(inode, ipage); 215 + f2fs_put_page(ipage, 1); 216 + } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { 217 + truncate_blocks(inode, 0); 218 + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 219 + goto process_inline; 220 + } 221 + return 0; 222 + }

+12 -11

fs/f2fs/inode.c

··· 42 42 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 43 43 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 44 44 if (ri->i_addr[0]) 45 - inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); 45 + inode->i_rdev = 46 + old_decode_dev(le32_to_cpu(ri->i_addr[0])); 46 47 else 47 - inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); 48 + inode->i_rdev = 49 + new_decode_dev(le32_to_cpu(ri->i_addr[1])); 48 50 } 49 51 } 50 52 ··· 54 52 { 55 53 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 56 54 if (old_valid_dev(inode->i_rdev)) { 57 - ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); 55 + ri->i_addr[0] = 56 + cpu_to_le32(old_encode_dev(inode->i_rdev)); 58 57 ri->i_addr[1] = 0; 59 58 } else { 60 59 ri->i_addr[0] = 0; 61 - ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); 60 + ri->i_addr[1] = 61 + cpu_to_le32(new_encode_dev(inode->i_rdev)); 62 62 ri->i_addr[2] = 0; 63 63 } 64 64 } ··· 71 67 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 72 68 struct f2fs_inode_info *fi = F2FS_I(inode); 73 69 struct page *node_page; 74 - struct f2fs_node *rn; 75 70 struct f2fs_inode *ri; 76 71 77 72 /* Check if ino is within scope */ ··· 84 81 if (IS_ERR(node_page)) 85 82 return PTR_ERR(node_page); 86 83 87 - rn = F2FS_NODE(node_page); 88 - ri = &(rn->i); 84 + ri = F2FS_INODE(node_page); 89 85 90 86 inode->i_mode = le16_to_cpu(ri->i_mode); 91 87 i_uid_write(inode, le32_to_cpu(ri->i_uid)); ··· 177 175 178 176 void update_inode(struct inode *inode, struct page *node_page) 179 177 { 180 - struct f2fs_node *rn; 181 178 struct f2fs_inode *ri; 182 179 183 - f2fs_wait_on_page_writeback(node_page, NODE, false); 180 + f2fs_wait_on_page_writeback(node_page, NODE); 184 181 185 - rn = F2FS_NODE(node_page); 186 - ri = &(rn->i); 182 + ri = F2FS_INODE(node_page); 187 183 188 184 ri->i_mode = cpu_to_le16(inode->i_mode); 189 185 ri->i_advise = F2FS_I(inode)->i_advise; ··· 281 281 282 282 f2fs_lock_op(sbi); 283 283 remove_inode_page(inode); 284 + stat_dec_inline_inode(inode); 284 285 f2fs_unlock_op(sbi); 285 286 286 287 sb_end_intwrite(inode->i_sb);

+5

fs/f2fs/namei.c

··· 424 424 } 425 425 426 426 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 427 + F2FS_I(old_inode)->i_pino = new_dir->i_ino; 427 428 428 429 new_inode->i_ctime = CURRENT_TIME; 429 430 if (old_dir_entry) 430 431 drop_nlink(new_inode); 431 432 drop_nlink(new_inode); 433 + mark_inode_dirty(new_inode); 432 434 433 435 if (!new_inode->i_nlink) 434 436 add_orphan_inode(sbi, new_inode->i_ino); ··· 459 457 if (old_dir != new_dir) { 460 458 f2fs_set_link(old_inode, old_dir_entry, 461 459 old_dir_page, new_dir); 460 + F2FS_I(old_inode)->i_pino = new_dir->i_ino; 461 + update_inode_page(old_inode); 462 462 } else { 463 463 kunmap(old_dir_page); 464 464 f2fs_put_page(old_dir_page, 0); 465 465 } 466 466 drop_nlink(old_dir); 467 + mark_inode_dirty(old_dir); 467 468 update_inode_page(old_dir); 468 469 } 469 470

+157 -115

fs/f2fs/node.c

··· 87 87 */ 88 88 static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) 89 89 { 90 - struct address_space *mapping = sbi->meta_inode->i_mapping; 90 + struct address_space *mapping = META_MAPPING(sbi); 91 91 struct f2fs_nm_info *nm_i = NM_I(sbi); 92 - struct blk_plug plug; 93 92 struct page *page; 94 93 pgoff_t index; 95 94 int i; 95 + struct f2fs_io_info fio = { 96 + .type = META, 97 + .rw = READ_SYNC | REQ_META | REQ_PRIO 98 + }; 96 99 97 - blk_start_plug(&plug); 98 100 99 101 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { 100 - if (nid >= nm_i->max_nid) 102 + if (unlikely(nid >= nm_i->max_nid)) 101 103 nid = 0; 102 104 index = current_nat_addr(sbi, nid); 103 105 ··· 107 105 if (!page) 108 106 continue; 109 107 if (PageUptodate(page)) { 108 + mark_page_accessed(page); 110 109 f2fs_put_page(page, 1); 111 110 continue; 112 111 } 113 - if (f2fs_readpage(sbi, page, index, READ)) 114 - continue; 115 - 112 + f2fs_submit_page_mbio(sbi, page, index, &fio); 113 + mark_page_accessed(page); 116 114 f2fs_put_page(page, 0); 117 115 } 118 - blk_finish_plug(&plug); 116 + f2fs_submit_merged_bio(sbi, META, READ); 119 117 } 120 118 121 119 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) ··· 393 391 394 392 /* 395 393 * Caller should call f2fs_put_dnode(dn). 396 - * Also, it should grab and release a mutex by calling mutex_lock_op() and 397 - * mutex_unlock_op() only if ro is not set RDONLY_NODE. 394 + * Also, it should grab and release a rwsem by calling f2fs_lock_op() and 395 + * f2fs_unlock_op() only if ro is not set RDONLY_NODE. 398 396 * In the case of RDONLY_NODE, we don't need to care about mutex. 399 397 */ 400 398 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) ··· 504 502 505 503 /* Deallocate node address */ 506 504 invalidate_blocks(sbi, ni.blk_addr); 507 - dec_valid_node_count(sbi, dn->inode, 1); 505 + dec_valid_node_count(sbi, dn->inode); 508 506 set_node_addr(sbi, &ni, NULL_ADDR); 509 507 510 508 if (dn->nid == dn->inode->i_ino) { ··· 518 516 F2FS_SET_SB_DIRT(sbi); 519 517 520 518 f2fs_put_page(dn->node_page, 1); 519 + 520 + invalidate_mapping_pages(NODE_MAPPING(sbi), 521 + dn->node_page->index, dn->node_page->index); 522 + 521 523 dn->node_page = NULL; 522 524 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 523 525 } ··· 637 631 return 0; 638 632 639 633 /* get indirect nodes in the path */ 640 - for (i = 0; i < depth - 1; i++) { 634 + for (i = 0; i < idx + 1; i++) { 641 635 /* refernece count'll be increased */ 642 636 pages[i] = get_node_page(sbi, nid[i]); 643 637 if (IS_ERR(pages[i])) { 644 - depth = i + 1; 645 638 err = PTR_ERR(pages[i]); 639 + idx = i - 1; 646 640 goto fail; 647 641 } 648 642 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 649 643 } 650 644 651 645 /* free direct nodes linked to a partial indirect node */ 652 - for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { 646 + for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 653 647 child_nid = get_nid(pages[idx], i, false); 654 648 if (!child_nid) 655 649 continue; ··· 660 654 set_nid(pages[idx], i, 0, false); 661 655 } 662 656 663 - if (offset[depth - 1] == 0) { 657 + if (offset[idx + 1] == 0) { 664 658 dn->node_page = pages[idx]; 665 659 dn->nid = nid[idx]; 666 660 truncate_node(dn); ··· 668 662 f2fs_put_page(pages[idx], 1); 669 663 } 670 664 offset[idx]++; 671 - offset[depth - 1] = 0; 665 + offset[idx + 1] = 0; 666 + idx--; 672 667 fail: 673 - for (i = depth - 3; i >= 0; i--) 668 + for (i = idx; i >= 0; i--) 674 669 f2fs_put_page(pages[i], 1); 675 670 676 671 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); ··· 685 678 int truncate_inode_blocks(struct inode *inode, pgoff_t from) 686 679 { 687 680 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 688 - struct address_space *node_mapping = sbi->node_inode->i_mapping; 689 681 int err = 0, cont = 1; 690 682 int level, offset[4], noffset[4]; 691 683 unsigned int nofs = 0; 692 - struct f2fs_node *rn; 684 + struct f2fs_inode *ri; 693 685 struct dnode_of_data dn; 694 686 struct page *page; 695 687 ··· 705 699 set_new_dnode(&dn, inode, page, NULL, 0); 706 700 unlock_page(page); 707 701 708 - rn = F2FS_NODE(page); 702 + ri = F2FS_INODE(page); 709 703 switch (level) { 710 704 case 0: 711 705 case 1: ··· 715 709 nofs = noffset[1]; 716 710 if (!offset[level - 1]) 717 711 goto skip_partial; 718 - err = truncate_partial_nodes(&dn, &rn->i, offset, level); 712 + err = truncate_partial_nodes(&dn, ri, offset, level); 719 713 if (err < 0 && err != -ENOENT) 720 714 goto fail; 721 715 nofs += 1 + NIDS_PER_BLOCK; ··· 724 718 nofs = 5 + 2 * NIDS_PER_BLOCK; 725 719 if (!offset[level - 1]) 726 720 goto skip_partial; 727 - err = truncate_partial_nodes(&dn, &rn->i, offset, level); 721 + err = truncate_partial_nodes(&dn, ri, offset, level); 728 722 if (err < 0 && err != -ENOENT) 729 723 goto fail; 730 724 break; ··· 734 728 735 729 skip_partial: 736 730 while (cont) { 737 - dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); 731 + dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); 738 732 switch (offset[0]) { 739 733 case NODE_DIR1_BLOCK: 740 734 case NODE_DIR2_BLOCK: ··· 757 751 if (err < 0 && err != -ENOENT) 758 752 goto fail; 759 753 if (offset[1] == 0 && 760 - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { 754 + ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 761 755 lock_page(page); 762 - if (page->mapping != node_mapping) { 756 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 763 757 f2fs_put_page(page, 1); 764 758 goto restart; 765 759 } 766 760 wait_on_page_writeback(page); 767 - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 761 + ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 768 762 set_page_dirty(page); 769 763 unlock_page(page); 770 764 } ··· 800 794 set_new_dnode(&dn, inode, page, npage, nid); 801 795 802 796 if (page) 803 - dn.inode_page_locked = 1; 797 + dn.inode_page_locked = true; 804 798 truncate_node(&dn); 805 799 return 0; 806 800 } 807 801 808 802 /* 809 - * Caller should grab and release a mutex by calling mutex_lock_op() and 810 - * mutex_unlock_op(). 803 + * Caller should grab and release a rwsem by calling f2fs_lock_op() and 804 + * f2fs_unlock_op(). 811 805 */ 812 - int remove_inode_page(struct inode *inode) 806 + void remove_inode_page(struct inode *inode) 813 807 { 814 808 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 815 809 struct page *page; 816 810 nid_t ino = inode->i_ino; 817 811 struct dnode_of_data dn; 818 - int err; 819 812 820 813 page = get_node_page(sbi, ino); 821 814 if (IS_ERR(page)) 822 - return PTR_ERR(page); 815 + return; 823 816 824 - err = truncate_xattr_node(inode, page); 825 - if (err) { 817 + if (truncate_xattr_node(inode, page)) { 826 818 f2fs_put_page(page, 1); 827 - return err; 819 + return; 828 820 } 829 - 830 821 /* 0 is possible, after f2fs_new_inode() is failed */ 831 822 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 832 823 set_new_dnode(&dn, inode, page, page, ino); 833 824 truncate_node(&dn); 834 - return 0; 835 825 } 836 826 837 827 struct page *new_inode_page(struct inode *inode, const struct qstr *name) ··· 845 843 unsigned int ofs, struct page *ipage) 846 844 { 847 845 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 848 - struct address_space *mapping = sbi->node_inode->i_mapping; 849 846 struct node_info old_ni, new_ni; 850 847 struct page *page; 851 848 int err; 852 849 853 - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 850 + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 854 851 return ERR_PTR(-EPERM); 855 852 856 - page = grab_cache_page(mapping, dn->nid); 853 + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 857 854 if (!page) 858 855 return ERR_PTR(-ENOMEM); 859 856 860 - if (!inc_valid_node_count(sbi, dn->inode, 1)) { 857 + if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { 861 858 err = -ENOSPC; 862 859 goto fail; 863 860 } ··· 899 898 * LOCKED_PAGE: f2fs_put_page(page, 1) 900 899 * error: nothing 901 900 */ 902 - static int read_node_page(struct page *page, int type) 901 + static int read_node_page(struct page *page, int rw) 903 902 { 904 903 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 905 904 struct node_info ni; 906 905 907 906 get_node_info(sbi, page->index, &ni); 908 907 909 - if (ni.blk_addr == NULL_ADDR) { 908 + if (unlikely(ni.blk_addr == NULL_ADDR)) { 910 909 f2fs_put_page(page, 1); 911 910 return -ENOENT; 912 911 } ··· 914 913 if (PageUptodate(page)) 915 914 return LOCKED_PAGE; 916 915 917 - return f2fs_readpage(sbi, page, ni.blk_addr, type); 916 + return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); 918 917 } 919 918 920 919 /* ··· 922 921 */ 923 922 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 924 923 { 925 - struct address_space *mapping = sbi->node_inode->i_mapping; 926 924 struct page *apage; 927 925 int err; 928 926 929 - apage = find_get_page(mapping, nid); 927 + apage = find_get_page(NODE_MAPPING(sbi), nid); 930 928 if (apage && PageUptodate(apage)) { 931 929 f2fs_put_page(apage, 0); 932 930 return; 933 931 } 934 932 f2fs_put_page(apage, 0); 935 933 936 - apage = grab_cache_page(mapping, nid); 934 + apage = grab_cache_page(NODE_MAPPING(sbi), nid); 937 935 if (!apage) 938 936 return; 939 937 ··· 945 945 946 946 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 947 947 { 948 - struct address_space *mapping = sbi->node_inode->i_mapping; 949 948 struct page *page; 950 949 int err; 951 950 repeat: 952 - page = grab_cache_page(mapping, nid); 951 + page = grab_cache_page(NODE_MAPPING(sbi), nid); 953 952 if (!page) 954 953 return ERR_PTR(-ENOMEM); 955 954 ··· 959 960 goto got_it; 960 961 961 962 lock_page(page); 962 - if (!PageUptodate(page)) { 963 + if (unlikely(!PageUptodate(page))) { 963 964 f2fs_put_page(page, 1); 964 965 return ERR_PTR(-EIO); 965 966 } 966 - if (page->mapping != mapping) { 967 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 967 968 f2fs_put_page(page, 1); 968 969 goto repeat; 969 970 } ··· 980 981 struct page *get_node_page_ra(struct page *parent, int start) 981 982 { 982 983 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 983 - struct address_space *mapping = sbi->node_inode->i_mapping; 984 984 struct blk_plug plug; 985 985 struct page *page; 986 986 int err, i, end; ··· 990 992 if (!nid) 991 993 return ERR_PTR(-ENOENT); 992 994 repeat: 993 - page = grab_cache_page(mapping, nid); 995 + page = grab_cache_page(NODE_MAPPING(sbi), nid); 994 996 if (!page) 995 997 return ERR_PTR(-ENOMEM); 996 998 ··· 1015 1017 blk_finish_plug(&plug); 1016 1018 1017 1019 lock_page(page); 1018 - if (page->mapping != mapping) { 1020 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1019 1021 f2fs_put_page(page, 1); 1020 1022 goto repeat; 1021 1023 } 1022 1024 page_hit: 1023 - if (!PageUptodate(page)) { 1025 + if (unlikely(!PageUptodate(page))) { 1024 1026 f2fs_put_page(page, 1); 1025 1027 return ERR_PTR(-EIO); 1026 1028 } ··· 1046 1048 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1047 1049 struct writeback_control *wbc) 1048 1050 { 1049 - struct address_space *mapping = sbi->node_inode->i_mapping; 1050 1051 pgoff_t index, end; 1051 1052 struct pagevec pvec; 1052 1053 int step = ino ? 2 : 0; ··· 1059 1062 1060 1063 while (index <= end) { 1061 1064 int i, nr_pages; 1062 - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1065 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1063 1066 PAGECACHE_TAG_DIRTY, 1064 1067 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1065 1068 if (nr_pages == 0) ··· 1092 1095 else if (!trylock_page(page)) 1093 1096 continue; 1094 1097 1095 - if (unlikely(page->mapping != mapping)) { 1098 + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1096 1099 continue_unlock: 1097 1100 unlock_page(page); 1098 1101 continue; ··· 1119 1122 set_fsync_mark(page, 0); 1120 1123 set_dentry_mark(page, 0); 1121 1124 } 1122 - mapping->a_ops->writepage(page, wbc); 1125 + NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1123 1126 wrote++; 1124 1127 1125 1128 if (--wbc->nr_to_write == 0) ··· 1140 1143 } 1141 1144 1142 1145 if (wrote) 1143 - f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); 1144 - 1146 + f2fs_submit_merged_bio(sbi, NODE, WRITE); 1145 1147 return nwritten; 1146 1148 } 1147 1149 1148 1150 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) 1149 1151 { 1150 - struct address_space *mapping = sbi->node_inode->i_mapping; 1151 1152 pgoff_t index = 0, end = LONG_MAX; 1152 1153 struct pagevec pvec; 1153 - int nr_pages; 1154 1154 int ret2 = 0, ret = 0; 1155 1155 1156 1156 pagevec_init(&pvec, 0); 1157 - while ((index <= end) && 1158 - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1159 - PAGECACHE_TAG_WRITEBACK, 1160 - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { 1161 - unsigned i; 1157 + 1158 + while (index <= end) { 1159 + int i, nr_pages; 1160 + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, 1161 + PAGECACHE_TAG_WRITEBACK, 1162 + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1163 + if (nr_pages == 0) 1164 + break; 1162 1165 1163 1166 for (i = 0; i < nr_pages; i++) { 1164 1167 struct page *page = pvec.pages[i]; 1165 1168 1166 1169 /* until radix tree lookup accepts end_index */ 1167 - if (page->index > end) 1170 + if (unlikely(page->index > end)) 1168 1171 continue; 1169 1172 1170 1173 if (ino && ino_of_node(page) == ino) { ··· 1177 1180 cond_resched(); 1178 1181 } 1179 1182 1180 - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) 1183 + if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) 1181 1184 ret2 = -ENOSPC; 1182 - if (test_and_clear_bit(AS_EIO, &mapping->flags)) 1185 + if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags))) 1183 1186 ret2 = -EIO; 1184 1187 if (!ret) 1185 1188 ret = ret2; ··· 1193 1196 nid_t nid; 1194 1197 block_t new_addr; 1195 1198 struct node_info ni; 1199 + struct f2fs_io_info fio = { 1200 + .type = NODE, 1201 + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 1202 + }; 1196 1203 1197 - if (sbi->por_doing) 1204 + if (unlikely(sbi->por_doing)) 1198 1205 goto redirty_out; 1199 1206 1200 1207 wait_on_page_writeback(page); ··· 1210 1209 get_node_info(sbi, nid, &ni); 1211 1210 1212 1211 /* This page is already truncated */ 1213 - if (ni.blk_addr == NULL_ADDR) { 1212 + if (unlikely(ni.blk_addr == NULL_ADDR)) { 1214 1213 dec_page_count(sbi, F2FS_DIRTY_NODES); 1215 1214 unlock_page(page); 1216 1215 return 0; ··· 1221 1220 1222 1221 mutex_lock(&sbi->node_write); 1223 1222 set_page_writeback(page); 1224 - write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); 1223 + write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1225 1224 set_node_addr(sbi, &ni, new_addr); 1226 1225 dec_page_count(sbi, F2FS_DIRTY_NODES); 1227 1226 mutex_unlock(&sbi->node_write); ··· 1256 1255 1257 1256 /* if mounting is failed, skip writing node pages */ 1258 1257 wbc->nr_to_write = 3 * max_hw_blocks(sbi); 1258 + wbc->sync_mode = WB_SYNC_NONE; 1259 1259 sync_node_pages(sbi, 0, wbc); 1260 1260 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - 1261 1261 wbc->nr_to_write); ··· 1335 1333 return -1; 1336 1334 1337 1335 /* 0 nid should not be used */ 1338 - if (nid == 0) 1336 + if (unlikely(nid == 0)) 1339 1337 return 0; 1340 1338 1341 1339 if (build) { ··· 1388 1386 1389 1387 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1390 1388 1391 - if (start_nid >= nm_i->max_nid) 1389 + if (unlikely(start_nid >= nm_i->max_nid)) 1392 1390 break; 1393 1391 1394 1392 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); ··· 1422 1420 f2fs_put_page(page, 1); 1423 1421 1424 1422 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1425 - if (nid >= nm_i->max_nid) 1423 + if (unlikely(nid >= nm_i->max_nid)) 1426 1424 nid = 0; 1427 1425 1428 1426 if (i++ == FREE_NID_PAGES) ··· 1456 1454 struct free_nid *i = NULL; 1457 1455 struct list_head *this; 1458 1456 retry: 1459 - if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) 1457 + if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) 1460 1458 return false; 1461 1459 1462 1460 spin_lock(&nm_i->free_nid_list_lock); ··· 1537 1535 1538 1536 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1539 1537 { 1540 - struct address_space *mapping = sbi->node_inode->i_mapping; 1541 - struct f2fs_node *src, *dst; 1538 + struct f2fs_inode *src, *dst; 1542 1539 nid_t ino = ino_of_node(page); 1543 1540 struct node_info old_ni, new_ni; 1544 1541 struct page *ipage; 1545 1542 1546 - ipage = grab_cache_page(mapping, ino); 1543 + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); 1547 1544 if (!ipage) 1548 1545 return -ENOMEM; 1549 1546 ··· 1553 1552 SetPageUptodate(ipage); 1554 1553 fill_node_footer(ipage, ino, ino, 0, true); 1555 1554 1556 - src = F2FS_NODE(page); 1557 - dst = F2FS_NODE(ipage); 1555 + src = F2FS_INODE(page); 1556 + dst = F2FS_INODE(ipage); 1558 1557 1559 - memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); 1560 - dst->i.i_size = 0; 1561 - dst->i.i_blocks = cpu_to_le64(1); 1562 - dst->i.i_links = cpu_to_le32(1); 1563 - dst->i.i_xattr_nid = 0; 1558 + memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); 1559 + dst->i_size = 0; 1560 + dst->i_blocks = cpu_to_le64(1); 1561 + dst->i_links = cpu_to_le32(1); 1562 + dst->i_xattr_nid = 0; 1564 1563 1565 1564 new_ni = old_ni; 1566 1565 new_ni.ino = ino; 1567 1566 1568 - if (!inc_valid_node_count(sbi, NULL, 1)) 1567 + if (unlikely(!inc_valid_node_count(sbi, NULL))) 1569 1568 WARN_ON(1); 1570 1569 set_node_addr(sbi, &new_ni, NEW_ADDR); 1571 1570 inc_valid_inode_count(sbi); 1572 1571 f2fs_put_page(ipage, 1); 1572 + return 0; 1573 + } 1574 + 1575 + /* 1576 + * ra_sum_pages() merge contiguous pages into one bio and submit. 1577 + * these pre-readed pages are linked in pages list. 1578 + */ 1579 + static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, 1580 + int start, int nrpages) 1581 + { 1582 + struct page *page; 1583 + int page_idx = start; 1584 + struct f2fs_io_info fio = { 1585 + .type = META, 1586 + .rw = READ_SYNC | REQ_META | REQ_PRIO 1587 + }; 1588 + 1589 + for (; page_idx < start + nrpages; page_idx++) { 1590 + /* alloc temporal page for read node summary info*/ 1591 + page = alloc_page(GFP_F2FS_ZERO); 1592 + if (!page) { 1593 + struct page *tmp; 1594 + list_for_each_entry_safe(page, tmp, pages, lru) { 1595 + list_del(&page->lru); 1596 + unlock_page(page); 1597 + __free_pages(page, 0); 1598 + } 1599 + return -ENOMEM; 1600 + } 1601 + 1602 + lock_page(page); 1603 + page->index = page_idx; 1604 + list_add_tail(&page->lru, pages); 1605 + } 1606 + 1607 + list_for_each_entry(page, pages, lru) 1608 + f2fs_submit_page_mbio(sbi, page, page->index, &fio); 1609 + 1610 + f2fs_submit_merged_bio(sbi, META, READ); 1573 1611 return 0; 1574 1612 } 1575 1613 ··· 1617 1577 { 1618 1578 struct f2fs_node *rn; 1619 1579 struct f2fs_summary *sum_entry; 1620 - struct page *page; 1580 + struct page *page, *tmp; 1621 1581 block_t addr; 1622 - int i, last_offset; 1623 - 1624 - /* alloc temporal page for read node */ 1625 - page = alloc_page(GFP_NOFS | __GFP_ZERO); 1626 - if (!page) 1627 - return -ENOMEM; 1628 - lock_page(page); 1582 + int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1583 + int i, last_offset, nrpages, err = 0; 1584 + LIST_HEAD(page_list); 1629 1585 1630 1586 /* scan the node segment */ 1631 1587 last_offset = sbi->blocks_per_seg; 1632 1588 addr = START_BLOCK(sbi, segno); 1633 1589 sum_entry = &sum->entries[0]; 1634 1590 1635 - for (i = 0; i < last_offset; i++, sum_entry++) { 1636 - /* 1637 - * In order to read next node page, 1638 - * we must clear PageUptodate flag. 1639 - */ 1640 - ClearPageUptodate(page); 1591 + for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 1592 + nrpages = min(last_offset - i, bio_blocks); 1641 1593 1642 - if (f2fs_readpage(sbi, page, addr, READ_SYNC)) 1643 - goto out; 1594 + /* read ahead node pages */ 1595 + err = ra_sum_pages(sbi, &page_list, addr, nrpages); 1596 + if (err) 1597 + return err; 1644 1598 1645 - lock_page(page); 1646 - rn = F2FS_NODE(page); 1647 - sum_entry->nid = rn->footer.nid; 1648 - sum_entry->version = 0; 1649 - sum_entry->ofs_in_node = 0; 1650 - addr++; 1599 + list_for_each_entry_safe(page, tmp, &page_list, lru) { 1600 + 1601 + lock_page(page); 1602 + if (unlikely(!PageUptodate(page))) { 1603 + err = -EIO; 1604 + } else { 1605 + rn = F2FS_NODE(page); 1606 + sum_entry->nid = rn->footer.nid; 1607 + sum_entry->version = 0; 1608 + sum_entry->ofs_in_node = 0; 1609 + sum_entry++; 1610 + } 1611 + 1612 + list_del(&page->lru); 1613 + unlock_page(page); 1614 + __free_pages(page, 0); 1615 + } 1651 1616 } 1652 - unlock_page(page); 1653 - out: 1654 - __free_pages(page, 0); 1655 - return 0; 1617 + return err; 1656 1618 } 1657 1619 1658 1620 static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)

+7 -1

fs/f2fs/node.h

··· 224 224 * | `- direct node (5 + N => 5 + 2N - 1) 225 225 * `- double indirect node (5 + 2N) 226 226 * `- indirect node (6 + 2N) 227 - * `- direct node (x(N + 1)) 227 + * `- direct node 228 + * ...... 229 + * `- indirect node ((6 + 2N) + x(N + 1)) 230 + * `- direct node 231 + * ...... 232 + * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) 233 + * `- direct node 228 234 */ 229 235 static inline bool IS_DNODE(struct page *node_page) 230 236 {

+29 -20

fs/f2fs/recovery.c

··· 40 40 41 41 static int recover_dentry(struct page *ipage, struct inode *inode) 42 42 { 43 - struct f2fs_node *raw_node = F2FS_NODE(ipage); 44 - struct f2fs_inode *raw_inode = &(raw_node->i); 43 + struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 45 44 nid_t pino = le32_to_cpu(raw_inode->i_pino); 46 45 struct f2fs_dir_entry *de; 47 46 struct qstr name; ··· 61 62 62 63 name.len = le32_to_cpu(raw_inode->i_namelen); 63 64 name.name = raw_inode->i_name; 65 + 66 + if (unlikely(name.len > F2FS_NAME_LEN)) { 67 + WARN_ON(1); 68 + err = -ENAMETOOLONG; 69 + goto out; 70 + } 64 71 retry: 65 72 de = f2fs_find_entry(dir, &name, &page); 66 73 if (de && inode->i_ino == le32_to_cpu(de->ino)) ··· 95 90 kunmap(page); 96 91 f2fs_put_page(page, 0); 97 92 out: 98 - f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " 99 - "ino = %x, name = %s, dir = %lx, err = %d", 100 - ino_of_node(ipage), raw_inode->i_name, 93 + f2fs_msg(inode->i_sb, KERN_NOTICE, 94 + "%s: ino = %x, name = %s, dir = %lx, err = %d", 95 + __func__, ino_of_node(ipage), raw_inode->i_name, 101 96 IS_ERR(dir) ? 0 : dir->i_ino, err); 102 97 return err; 103 98 } 104 99 105 100 static int recover_inode(struct inode *inode, struct page *node_page) 106 101 { 107 - struct f2fs_node *raw_node = F2FS_NODE(node_page); 108 - struct f2fs_inode *raw_inode = &(raw_node->i); 102 + struct f2fs_inode *raw_inode = F2FS_INODE(node_page); 109 103 110 104 if (!IS_INODE(node_page)) 111 105 return 0; ··· 147 143 while (1) { 148 144 struct fsync_inode_entry *entry; 149 145 150 - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); 146 + err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 151 147 if (err) 152 - goto out; 148 + return err; 153 149 154 150 lock_page(page); 155 151 ··· 195 191 /* check next segment */ 196 192 blkaddr = next_blkaddr_of_node(page); 197 193 } 194 + 198 195 unlock_page(page); 199 - out: 200 196 __free_pages(page, 0); 197 + 201 198 return err; 202 199 } 203 200 ··· 298 293 struct node_info ni; 299 294 int err = 0, recovered = 0; 300 295 296 + if (recover_inline_data(inode, page)) 297 + goto out; 298 + 301 299 start = start_bidx_of_node(ofs_of_node(page), fi); 302 300 if (IS_INODE(page)) 303 301 end = start + ADDRS_PER_INODE(fi); ··· 308 300 end = start + ADDRS_PER_BLOCK; 309 301 310 302 f2fs_lock_op(sbi); 303 + 311 304 set_new_dnode(&dn, inode, NULL, NULL, 0); 312 305 313 306 err = get_dnode_of_data(&dn, start, ALLOC_NODE); 314 307 if (err) { 315 308 f2fs_unlock_op(sbi); 316 - return err; 309 + goto out; 317 310 } 318 311 319 312 wait_on_page_writeback(dn.node_page); ··· 365 356 err: 366 357 f2fs_put_dnode(&dn); 367 358 f2fs_unlock_op(sbi); 368 - 369 - f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " 370 - "recovered_data = %d blocks, err = %d", 371 - inode->i_ino, recovered, err); 359 + out: 360 + f2fs_msg(sbi->sb, KERN_NOTICE, 361 + "recover_data: ino = %lx, recovered = %d blocks, err = %d", 362 + inode->i_ino, recovered, err); 372 363 return err; 373 364 } 374 365 ··· 386 377 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 387 378 388 379 /* read node page */ 389 - page = alloc_page(GFP_NOFS | __GFP_ZERO); 380 + page = alloc_page(GFP_F2FS_ZERO); 390 381 if (!page) 391 382 return -ENOMEM; 392 383 ··· 395 386 while (1) { 396 387 struct fsync_inode_entry *entry; 397 388 398 - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); 389 + err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 399 390 if (err) 400 - goto out; 391 + return err; 401 392 402 393 lock_page(page); 403 394 ··· 421 412 /* check next segment */ 422 413 blkaddr = next_blkaddr_of_node(page); 423 414 } 415 + 424 416 unlock_page(page); 425 - out: 426 417 __free_pages(page, 0); 427 418 428 419 if (!err) ··· 438 429 439 430 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 440 431 sizeof(struct fsync_inode_entry), NULL); 441 - if (unlikely(!fsync_entry_slab)) 432 + if (!fsync_entry_slab) 442 433 return -ENOMEM; 443 434 444 435 INIT_LIST_HEAD(&inode_list);

+380 -204

fs/f2fs/segment.c

··· 14 14 #include <linux/blkdev.h> 15 15 #include <linux/prefetch.h> 16 16 #include <linux/vmalloc.h> 17 + #include <linux/swap.h> 17 18 18 19 #include "f2fs.h" 19 20 #include "segment.h" 20 21 #include "node.h" 21 22 #include <trace/events/f2fs.h> 23 + 24 + #define __reverse_ffz(x) __reverse_ffs(~(x)) 25 + 26 + static struct kmem_cache *discard_entry_slab; 27 + 28 + /* 29 + * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 30 + * MSB and LSB are reversed in a byte by f2fs_set_bit. 31 + */ 32 + static inline unsigned long __reverse_ffs(unsigned long word) 33 + { 34 + int num = 0; 35 + 36 + #if BITS_PER_LONG == 64 37 + if ((word & 0xffffffff) == 0) { 38 + num += 32; 39 + word >>= 32; 40 + } 41 + #endif 42 + if ((word & 0xffff) == 0) { 43 + num += 16; 44 + word >>= 16; 45 + } 46 + if ((word & 0xff) == 0) { 47 + num += 8; 48 + word >>= 8; 49 + } 50 + if ((word & 0xf0) == 0) 51 + num += 4; 52 + else 53 + word >>= 4; 54 + if ((word & 0xc) == 0) 55 + num += 2; 56 + else 57 + word >>= 2; 58 + if ((word & 0x2) == 0) 59 + num += 1; 60 + return num; 61 + } 62 + 63 + /* 64 + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue 65 + * f2fs_set_bit makes MSB and LSB reversed in a byte. 66 + * Example: 67 + * LSB <--> MSB 68 + * f2fs_set_bit(0, bitmap) => 0000 0001 69 + * f2fs_set_bit(7, bitmap) => 1000 0000 70 + */ 71 + static unsigned long __find_rev_next_bit(const unsigned long *addr, 72 + unsigned long size, unsigned long offset) 73 + { 74 + const unsigned long *p = addr + BIT_WORD(offset); 75 + unsigned long result = offset & ~(BITS_PER_LONG - 1); 76 + unsigned long tmp; 77 + unsigned long mask, submask; 78 + unsigned long quot, rest; 79 + 80 + if (offset >= size) 81 + return size; 82 + 83 + size -= result; 84 + offset %= BITS_PER_LONG; 85 + if (!offset) 86 + goto aligned; 87 + 88 + tmp = *(p++); 89 + quot = (offset >> 3) << 3; 90 + rest = offset & 0x7; 91 + mask = ~0UL << quot; 92 + submask = (unsigned char)(0xff << rest) >> rest; 93 + submask <<= quot; 94 + mask &= submask; 95 + tmp &= mask; 96 + if (size < BITS_PER_LONG) 97 + goto found_first; 98 + if (tmp) 99 + goto found_middle; 100 + 101 + size -= BITS_PER_LONG; 102 + result += BITS_PER_LONG; 103 + aligned: 104 + while (size & ~(BITS_PER_LONG-1)) { 105 + tmp = *(p++); 106 + if (tmp) 107 + goto found_middle; 108 + result += BITS_PER_LONG; 109 + size -= BITS_PER_LONG; 110 + } 111 + if (!size) 112 + return result; 113 + tmp = *p; 114 + found_first: 115 + tmp &= (~0UL >> (BITS_PER_LONG - size)); 116 + if (tmp == 0UL) /* Are any bits set? */ 117 + return result + size; /* Nope. */ 118 + found_middle: 119 + return result + __reverse_ffs(tmp); 120 + } 121 + 122 + static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 123 + unsigned long size, unsigned long offset) 124 + { 125 + const unsigned long *p = addr + BIT_WORD(offset); 126 + unsigned long result = offset & ~(BITS_PER_LONG - 1); 127 + unsigned long tmp; 128 + unsigned long mask, submask; 129 + unsigned long quot, rest; 130 + 131 + if (offset >= size) 132 + return size; 133 + 134 + size -= result; 135 + offset %= BITS_PER_LONG; 136 + if (!offset) 137 + goto aligned; 138 + 139 + tmp = *(p++); 140 + quot = (offset >> 3) << 3; 141 + rest = offset & 0x7; 142 + mask = ~(~0UL << quot); 143 + submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest); 144 + submask <<= quot; 145 + mask += submask; 146 + tmp |= mask; 147 + if (size < BITS_PER_LONG) 148 + goto found_first; 149 + if (~tmp) 150 + goto found_middle; 151 + 152 + size -= BITS_PER_LONG; 153 + result += BITS_PER_LONG; 154 + aligned: 155 + while (size & ~(BITS_PER_LONG - 1)) { 156 + tmp = *(p++); 157 + if (~tmp) 158 + goto found_middle; 159 + result += BITS_PER_LONG; 160 + size -= BITS_PER_LONG; 161 + } 162 + if (!size) 163 + return result; 164 + tmp = *p; 165 + 166 + found_first: 167 + tmp |= ~0UL << size; 168 + if (tmp == ~0UL) /* Are any bits zero? */ 169 + return result + size; /* Nope. */ 170 + found_middle: 171 + return result + __reverse_ffz(tmp); 172 + } 22 173 23 174 /* 24 175 * This function balances dirty node and dentry pages. ··· 267 116 mutex_unlock(&dirty_i->seglist_lock); 268 117 } 269 118 119 + static void f2fs_issue_discard(struct f2fs_sb_info *sbi, 120 + block_t blkstart, block_t blklen) 121 + { 122 + sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); 123 + sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); 124 + blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 125 + trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 126 + } 127 + 128 + static void add_discard_addrs(struct f2fs_sb_info *sbi, 129 + unsigned int segno, struct seg_entry *se) 130 + { 131 + struct list_head *head = &SM_I(sbi)->discard_list; 132 + struct discard_entry *new; 133 + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 134 + int max_blocks = sbi->blocks_per_seg; 135 + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 136 + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 137 + unsigned long dmap[entries]; 138 + unsigned int start = 0, end = -1; 139 + int i; 140 + 141 + if (!test_opt(sbi, DISCARD)) 142 + return; 143 + 144 + /* zero block will be discarded through the prefree list */ 145 + if (!se->valid_blocks || se->valid_blocks == max_blocks) 146 + return; 147 + 148 + /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 149 + for (i = 0; i < entries; i++) 150 + dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 151 + 152 + while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 153 + start = __find_rev_next_bit(dmap, max_blocks, end + 1); 154 + if (start >= max_blocks) 155 + break; 156 + 157 + end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 158 + 159 + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 160 + INIT_LIST_HEAD(&new->list); 161 + new->blkaddr = START_BLOCK(sbi, segno) + start; 162 + new->len = end - start; 163 + 164 + list_add_tail(&new->list, head); 165 + SM_I(sbi)->nr_discards += end - start; 166 + } 167 + } 168 + 270 169 /* 271 170 * Should call clear_prefree_segments after checkpoint is done. 272 171 */ ··· 339 138 340 139 void clear_prefree_segments(struct f2fs_sb_info *sbi) 341 140 { 141 + struct list_head *head = &(SM_I(sbi)->discard_list); 142 + struct list_head *this, *next; 143 + struct discard_entry *entry; 342 144 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 343 145 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 344 146 unsigned int total_segs = TOTAL_SEGS(sbi); ··· 364 160 if (!test_opt(sbi, DISCARD)) 365 161 continue; 366 162 367 - blkdev_issue_discard(sbi->sb->s_bdev, 368 - START_BLOCK(sbi, start) << 369 - sbi->log_sectors_per_block, 370 - (1 << (sbi->log_sectors_per_block + 371 - sbi->log_blocks_per_seg)) * (end - start), 372 - GFP_NOFS, 0); 163 + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 164 + (end - start) << sbi->log_blocks_per_seg); 373 165 } 374 166 mutex_unlock(&dirty_i->seglist_lock); 167 + 168 + /* send small discards */ 169 + list_for_each_safe(this, next, head) { 170 + entry = list_entry(this, struct discard_entry, list); 171 + f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 172 + list_del(&entry->list); 173 + SM_I(sbi)->nr_discards -= entry->len; 174 + kmem_cache_free(discard_entry_slab, entry); 175 + } 375 176 } 376 177 377 178 static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) ··· 668 459 struct curseg_info *seg, block_t start) 669 460 { 670 461 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 671 - block_t ofs; 672 - for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { 673 - if (!f2fs_test_bit(ofs, se->ckpt_valid_map) 674 - && !f2fs_test_bit(ofs, se->cur_valid_map)) 675 - break; 676 - } 677 - seg->next_blkoff = ofs; 462 + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 463 + unsigned long target_map[entries]; 464 + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 465 + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 466 + int i, pos; 467 + 468 + for (i = 0; i < entries; i++) 469 + target_map[i] = ckpt_map[i] | cur_map[i]; 470 + 471 + pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); 472 + 473 + seg->next_blkoff = pos; 678 474 } 679 475 680 476 /* ··· 787 573 .allocate_segment = allocate_segment_by_default, 788 574 }; 789 575 790 - static void f2fs_end_io_write(struct bio *bio, int err) 791 - { 792 - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 793 - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 794 - struct bio_private *p = bio->bi_private; 795 - 796 - do { 797 - struct page *page = bvec->bv_page; 798 - 799 - if (--bvec >= bio->bi_io_vec) 800 - prefetchw(&bvec->bv_page->flags); 801 - if (!uptodate) { 802 - SetPageError(page); 803 - if (page->mapping) 804 - set_bit(AS_EIO, &page->mapping->flags); 805 - set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); 806 - p->sbi->sb->s_flags |= MS_RDONLY; 807 - } 808 - end_page_writeback(page); 809 - dec_page_count(p->sbi, F2FS_WRITEBACK); 810 - } while (bvec >= bio->bi_io_vec); 811 - 812 - if (p->is_sync) 813 - complete(p->wait); 814 - 815 - if (!get_pages(p->sbi, F2FS_WRITEBACK) && 816 - !list_empty(&p->sbi->cp_wait.task_list)) 817 - wake_up(&p->sbi->cp_wait); 818 - 819 - kfree(p); 820 - bio_put(bio); 821 - } 822 - 823 - struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) 824 - { 825 - struct bio *bio; 826 - 827 - /* No failure on bio allocation */ 828 - bio = bio_alloc(GFP_NOIO, npages); 829 - bio->bi_bdev = bdev; 830 - bio->bi_private = NULL; 831 - 832 - return bio; 833 - } 834 - 835 - static void do_submit_bio(struct f2fs_sb_info *sbi, 836 - enum page_type type, bool sync) 837 - { 838 - int rw = sync ? WRITE_SYNC : WRITE; 839 - enum page_type btype = type > META ? META : type; 840 - 841 - if (type >= META_FLUSH) 842 - rw = WRITE_FLUSH_FUA; 843 - 844 - if (btype == META) 845 - rw |= REQ_META; 846 - 847 - if (sbi->bio[btype]) { 848 - struct bio_private *p = sbi->bio[btype]->bi_private; 849 - p->sbi = sbi; 850 - sbi->bio[btype]->bi_end_io = f2fs_end_io_write; 851 - 852 - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); 853 - 854 - if (type == META_FLUSH) { 855 - DECLARE_COMPLETION_ONSTACK(wait); 856 - p->is_sync = true; 857 - p->wait = &wait; 858 - submit_bio(rw, sbi->bio[btype]); 859 - wait_for_completion(&wait); 860 - } else { 861 - p->is_sync = false; 862 - submit_bio(rw, sbi->bio[btype]); 863 - } 864 - sbi->bio[btype] = NULL; 865 - } 866 - } 867 - 868 - void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) 869 - { 870 - down_write(&sbi->bio_sem); 871 - do_submit_bio(sbi, type, sync); 872 - up_write(&sbi->bio_sem); 873 - } 874 - 875 - static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, 876 - block_t blk_addr, enum page_type type) 877 - { 878 - struct block_device *bdev = sbi->sb->s_bdev; 879 - int bio_blocks; 880 - 881 - verify_block_addr(sbi, blk_addr); 882 - 883 - down_write(&sbi->bio_sem); 884 - 885 - inc_page_count(sbi, F2FS_WRITEBACK); 886 - 887 - if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) 888 - do_submit_bio(sbi, type, false); 889 - alloc_new: 890 - if (sbi->bio[type] == NULL) { 891 - struct bio_private *priv; 892 - retry: 893 - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); 894 - if (!priv) { 895 - cond_resched(); 896 - goto retry; 897 - } 898 - 899 - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 900 - sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks); 901 - sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 902 - sbi->bio[type]->bi_private = priv; 903 - /* 904 - * The end_io will be assigned at the sumbission phase. 905 - * Until then, let bio_add_page() merge consecutive IOs as much 906 - * as possible. 907 - */ 908 - } 909 - 910 - if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < 911 - PAGE_CACHE_SIZE) { 912 - do_submit_bio(sbi, type, false); 913 - goto alloc_new; 914 - } 915 - 916 - sbi->last_block_in_bio[type] = blk_addr; 917 - 918 - up_write(&sbi->bio_sem); 919 - trace_f2fs_submit_write_page(page, blk_addr, type); 920 - } 921 - 922 - void f2fs_wait_on_page_writeback(struct page *page, 923 - enum page_type type, bool sync) 924 - { 925 - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 926 - if (PageWriteback(page)) { 927 - f2fs_submit_bio(sbi, type, sync); 928 - wait_on_page_writeback(page); 929 - } 930 - } 931 - 932 576 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 933 577 { 934 578 struct curseg_info *curseg = CURSEG_I(sbi, type); ··· 854 782 return __get_segment_type_6(page, p_type); 855 783 } 856 784 857 - static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 858 - block_t old_blkaddr, block_t *new_blkaddr, 859 - struct f2fs_summary *sum, enum page_type p_type) 785 + void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, 786 + block_t old_blkaddr, block_t *new_blkaddr, 787 + struct f2fs_summary *sum, int type) 860 788 { 861 789 struct sit_info *sit_i = SIT_I(sbi); 862 790 struct curseg_info *curseg; 863 791 unsigned int old_cursegno; 864 - int type; 865 792 866 - type = __get_segment_type(page, p_type); 867 793 curseg = CURSEG_I(sbi, type); 868 794 869 795 mutex_lock(&curseg->curseg_mutex); ··· 894 824 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 895 825 mutex_unlock(&sit_i->sentry_lock); 896 826 897 - if (p_type == NODE) 827 + if (page && IS_NODESEG(type)) 898 828 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 899 - 900 - /* writeout dirty page into bdev */ 901 - submit_write_page(sbi, page, *new_blkaddr, p_type); 902 829 903 830 mutex_unlock(&curseg->curseg_mutex); 904 831 } 905 832 833 + static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 834 + block_t old_blkaddr, block_t *new_blkaddr, 835 + struct f2fs_summary *sum, struct f2fs_io_info *fio) 836 + { 837 + int type = __get_segment_type(page, fio->type); 838 + 839 + allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); 840 + 841 + /* writeout dirty page into bdev */ 842 + f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); 843 + } 844 + 906 845 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 907 846 { 847 + struct f2fs_io_info fio = { 848 + .type = META, 849 + .rw = WRITE_SYNC | REQ_META | REQ_PRIO 850 + }; 851 + 908 852 set_page_writeback(page); 909 - submit_write_page(sbi, page, page->index, META); 853 + f2fs_submit_page_mbio(sbi, page, page->index, &fio); 910 854 } 911 855 912 856 void write_node_page(struct f2fs_sb_info *sbi, struct page *page, 857 + struct f2fs_io_info *fio, 913 858 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) 914 859 { 915 860 struct f2fs_summary sum; 916 861 set_summary(&sum, nid, 0, 0); 917 - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); 862 + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); 918 863 } 919 864 920 - void write_data_page(struct inode *inode, struct page *page, 921 - struct dnode_of_data *dn, block_t old_blkaddr, 922 - block_t *new_blkaddr) 865 + void write_data_page(struct page *page, struct dnode_of_data *dn, 866 + block_t *new_blkaddr, struct f2fs_io_info *fio) 923 867 { 924 - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 868 + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 925 869 struct f2fs_summary sum; 926 870 struct node_info ni; 927 871 928 - f2fs_bug_on(old_blkaddr == NULL_ADDR); 872 + f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); 929 873 get_node_info(sbi, dn->nid, &ni); 930 874 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 931 875 932 - do_write_page(sbi, page, old_blkaddr, 933 - new_blkaddr, &sum, DATA); 876 + do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); 934 877 } 935 878 936 - void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, 937 - block_t old_blk_addr) 879 + void rewrite_data_page(struct page *page, block_t old_blkaddr, 880 + struct f2fs_io_info *fio) 938 881 { 939 - submit_write_page(sbi, page, old_blk_addr, DATA); 882 + struct inode *inode = page->mapping->host; 883 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 884 + f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); 940 885 } 941 886 942 887 void recover_data_page(struct f2fs_sb_info *sbi, ··· 1010 925 unsigned int segno, old_cursegno; 1011 926 block_t next_blkaddr = next_blkaddr_of_node(page); 1012 927 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); 928 + struct f2fs_io_info fio = { 929 + .type = NODE, 930 + .rw = WRITE_SYNC, 931 + }; 1013 932 1014 933 curseg = CURSEG_I(sbi, type); 1015 934 ··· 1042 953 1043 954 /* rewrite node page */ 1044 955 set_page_writeback(page); 1045 - submit_write_page(sbi, page, new_blkaddr, NODE); 1046 - f2fs_submit_bio(sbi, NODE, true); 956 + f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); 957 + f2fs_submit_merged_bio(sbi, NODE, WRITE); 1047 958 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1048 959 1049 960 locate_dirty_segment(sbi, old_cursegno); ··· 1051 962 1052 963 mutex_unlock(&sit_i->sentry_lock); 1053 964 mutex_unlock(&curseg->curseg_mutex); 965 + } 966 + 967 + void f2fs_wait_on_page_writeback(struct page *page, 968 + enum page_type type) 969 + { 970 + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 971 + if (PageWriteback(page)) { 972 + f2fs_submit_merged_bio(sbi, type, WRITE); 973 + wait_on_page_writeback(page); 974 + } 1054 975 } 1055 976 1056 977 static int read_compacted_summaries(struct f2fs_sb_info *sbi) ··· 1413 1314 1414 1315 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1415 1316 1317 + /* add discard candidates */ 1318 + if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) 1319 + add_discard_addrs(sbi, segno, se); 1320 + 1416 1321 if (flushed) 1417 1322 goto to_sit_page; 1418 1323 ··· 1583 1480 return restore_curseg_summaries(sbi); 1584 1481 } 1585 1482 1483 + static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) 1484 + { 1485 + struct address_space *mapping = META_MAPPING(sbi); 1486 + struct page *page; 1487 + block_t blk_addr, prev_blk_addr = 0; 1488 + int sit_blk_cnt = SIT_BLK_CNT(sbi); 1489 + int blkno = start; 1490 + struct f2fs_io_info fio = { 1491 + .type = META, 1492 + .rw = READ_SYNC | REQ_META | REQ_PRIO 1493 + }; 1494 + 1495 + for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { 1496 + 1497 + blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); 1498 + 1499 + if (blkno != start && prev_blk_addr + 1 != blk_addr) 1500 + break; 1501 + prev_blk_addr = blk_addr; 1502 + repeat: 1503 + page = grab_cache_page(mapping, blk_addr); 1504 + if (!page) { 1505 + cond_resched(); 1506 + goto repeat; 1507 + } 1508 + if (PageUptodate(page)) { 1509 + mark_page_accessed(page); 1510 + f2fs_put_page(page, 1); 1511 + continue; 1512 + } 1513 + 1514 + f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); 1515 + 1516 + mark_page_accessed(page); 1517 + f2fs_put_page(page, 0); 1518 + } 1519 + 1520 + f2fs_submit_merged_bio(sbi, META, READ); 1521 + return blkno - start; 1522 + } 1523 + 1586 1524 static void build_sit_entries(struct f2fs_sb_info *sbi) 1587 1525 { 1588 1526 struct sit_info *sit_i = SIT_I(sbi); 1589 1527 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1590 1528 struct f2fs_summary_block *sum = curseg->sum_blk; 1591 - unsigned int start; 1529 + int sit_blk_cnt = SIT_BLK_CNT(sbi); 1530 + unsigned int i, start, end; 1531 + unsigned int readed, start_blk = 0; 1532 + int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1592 1533 1593 - for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1594 - struct seg_entry *se = &sit_i->sentries[start]; 1595 - struct f2fs_sit_block *sit_blk; 1596 - struct f2fs_sit_entry sit; 1597 - struct page *page; 1598 - int i; 1534 + do { 1535 + readed = ra_sit_pages(sbi, start_blk, nrpages); 1599 1536 1600 - mutex_lock(&curseg->curseg_mutex); 1601 - for (i = 0; i < sits_in_cursum(sum); i++) { 1602 - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { 1603 - sit = sit_in_journal(sum, i); 1604 - mutex_unlock(&curseg->curseg_mutex); 1605 - goto got_it; 1537 + start = start_blk * sit_i->sents_per_block; 1538 + end = (start_blk + readed) * sit_i->sents_per_block; 1539 + 1540 + for (; start < end && start < TOTAL_SEGS(sbi); start++) { 1541 + struct seg_entry *se = &sit_i->sentries[start]; 1542 + struct f2fs_sit_block *sit_blk; 1543 + struct f2fs_sit_entry sit; 1544 + struct page *page; 1545 + 1546 + mutex_lock(&curseg->curseg_mutex); 1547 + for (i = 0; i < sits_in_cursum(sum); i++) { 1548 + if (le32_to_cpu(segno_in_journal(sum, i)) 1549 + == start) { 1550 + sit = sit_in_journal(sum, i); 1551 + mutex_unlock(&curseg->curseg_mutex); 1552 + goto got_it; 1553 + } 1554 + } 1555 + mutex_unlock(&curseg->curseg_mutex); 1556 + 1557 + page = get_current_sit_page(sbi, start); 1558 + sit_blk = (struct f2fs_sit_block *)page_address(page); 1559 + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 1560 + f2fs_put_page(page, 1); 1561 + got_it: 1562 + check_block_count(sbi, start, &sit); 1563 + seg_info_from_raw_sit(se, &sit); 1564 + if (sbi->segs_per_sec > 1) { 1565 + struct sec_entry *e = get_sec_entry(sbi, start); 1566 + e->valid_blocks += se->valid_blocks; 1606 1567 } 1607 1568 } 1608 - mutex_unlock(&curseg->curseg_mutex); 1609 - page = get_current_sit_page(sbi, start); 1610 - sit_blk = (struct f2fs_sit_block *)page_address(page); 1611 - sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 1612 - f2fs_put_page(page, 1); 1613 - got_it: 1614 - check_block_count(sbi, start, &sit); 1615 - seg_info_from_raw_sit(se, &sit); 1616 - if (sbi->segs_per_sec > 1) { 1617 - struct sec_entry *e = get_sec_entry(sbi, start); 1618 - e->valid_blocks += se->valid_blocks; 1619 - } 1620 - } 1569 + start_blk += readed; 1570 + } while (start_blk < sit_blk_cnt); 1621 1571 } 1622 1572 1623 1573 static void init_free_segmap(struct f2fs_sb_info *sbi) ··· 1800 1644 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1801 1645 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1802 1646 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; 1647 + sm_info->ipu_policy = F2FS_IPU_DISABLE; 1648 + sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 1649 + 1650 + INIT_LIST_HEAD(&sm_info->discard_list); 1651 + sm_info->nr_discards = 0; 1652 + sm_info->max_discards = 0; 1803 1653 1804 1654 err = build_sit_info(sbi); 1805 1655 if (err) ··· 1921 1759 destroy_sit_info(sbi); 1922 1760 sbi->sm_info = NULL; 1923 1761 kfree(sm_info); 1762 + } 1763 + 1764 + int __init create_segment_manager_caches(void) 1765 + { 1766 + discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 1767 + sizeof(struct discard_entry), NULL); 1768 + if (!discard_entry_slab) 1769 + return -ENOMEM; 1770 + return 0; 1771 + } 1772 + 1773 + void destroy_segment_manager_caches(void) 1774 + { 1775 + kmem_cache_destroy(discard_entry_slab); 1924 1776 }

+52 -29

fs/f2fs/segment.h

··· 20 20 #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 21 21 #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 22 22 23 - #define IS_DATASEG(t) \ 24 - ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ 25 - (t == CURSEG_WARM_DATA)) 26 - 27 - #define IS_NODESEG(t) \ 28 - ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ 29 - (t == CURSEG_WARM_NODE)) 23 + #define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) 24 + #define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) 30 25 31 26 #define IS_CURSEG(sbi, seg) \ 32 27 ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ ··· 78 83 (segno / SIT_ENTRY_PER_BLOCK) 79 84 #define START_SEGNO(sit_i, segno) \ 80 85 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) 86 + #define SIT_BLK_CNT(sbi) \ 87 + ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) 81 88 #define f2fs_bitmap_size(nr) \ 82 89 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 83 90 #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) 84 91 #define TOTAL_SECS(sbi) (sbi->total_sections) 85 92 86 93 #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 87 - (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 94 + (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) 88 95 #define SECTOR_TO_BLOCK(sbi, sectors) \ 89 - (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 96 + (sectors >> (sbi)->log_sectors_per_block) 90 97 #define MAX_BIO_BLOCKS(max_hw_blocks) \ 91 98 (min((int)max_hw_blocks, BIO_MAX_PAGES)) 92 - 93 - /* during checkpoint, bio_private is used to synchronize the last bio */ 94 - struct bio_private { 95 - struct f2fs_sb_info *sbi; 96 - bool is_sync; 97 - void *wait; 98 - }; 99 99 100 100 /* 101 101 * indicate a block allocation direction: RIGHT and LEFT. ··· 448 458 449 459 static inline bool need_SSR(struct f2fs_sb_info *sbi) 450 460 { 451 - return ((prefree_segments(sbi) / sbi->segs_per_sec) 452 - + free_sections(sbi) < overprovision_sections(sbi)); 461 + return (prefree_segments(sbi) / sbi->segs_per_sec) 462 + + free_sections(sbi) < overprovision_sections(sbi); 453 463 } 454 464 455 465 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) ··· 457 467 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 458 468 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 459 469 460 - if (sbi->por_doing) 470 + if (unlikely(sbi->por_doing)) 461 471 return false; 462 472 463 - return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + 464 - reserved_sections(sbi))); 473 + return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + 474 + reserved_sections(sbi)); 465 475 } 466 476 467 477 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) 468 478 { 469 - return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); 479 + return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; 470 480 } 471 481 472 482 static inline int utilization(struct f2fs_sb_info *sbi) 473 483 { 474 - return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); 484 + return div_u64((u64)valid_user_blocks(sbi) * 100, 485 + sbi->user_block_count); 475 486 } 476 487 477 488 /* 478 489 * Sometimes f2fs may be better to drop out-of-place update policy. 479 - * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write 480 - * data in the original place likewise other traditional file systems. 481 - * But, currently set 100 in percentage, which means it is disabled. 482 - * See below need_inplace_update(). 490 + * And, users can control the policy through sysfs entries. 491 + * There are five policies with triggering conditions as follows. 492 + * F2FS_IPU_FORCE - all the time, 493 + * F2FS_IPU_SSR - if SSR mode is activated, 494 + * F2FS_IPU_UTIL - if FS utilization is over threashold, 495 + * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over 496 + * threashold, 497 + * F2FS_IPUT_DISABLE - disable IPU. (=default option) 483 498 */ 484 - #define MIN_IPU_UTIL 100 499 + #define DEF_MIN_IPU_UTIL 70 500 + 501 + enum { 502 + F2FS_IPU_FORCE, 503 + F2FS_IPU_SSR, 504 + F2FS_IPU_UTIL, 505 + F2FS_IPU_SSR_UTIL, 506 + F2FS_IPU_DISABLE, 507 + }; 508 + 485 509 static inline bool need_inplace_update(struct inode *inode) 486 510 { 487 511 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 512 + 513 + /* IPU can be done only for the user data */ 488 514 if (S_ISDIR(inode->i_mode)) 489 515 return false; 490 - if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) 516 + 517 + switch (SM_I(sbi)->ipu_policy) { 518 + case F2FS_IPU_FORCE: 491 519 return true; 520 + case F2FS_IPU_SSR: 521 + if (need_SSR(sbi)) 522 + return true; 523 + break; 524 + case F2FS_IPU_UTIL: 525 + if (utilization(sbi) > SM_I(sbi)->min_ipu_util) 526 + return true; 527 + break; 528 + case F2FS_IPU_SSR_UTIL: 529 + if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) 530 + return true; 531 + break; 532 + case F2FS_IPU_DISABLE: 533 + break; 534 + } 492 535 return false; 493 536 } 494 537

+54 -18

fs/f2fs/super.c

··· 50 50 Opt_active_logs, 51 51 Opt_disable_ext_identify, 52 52 Opt_inline_xattr, 53 + Opt_inline_data, 53 54 Opt_err, 54 55 }; 55 56 ··· 66 65 {Opt_active_logs, "active_logs=%u"}, 67 66 {Opt_disable_ext_identify, "disable_ext_identify"}, 68 67 {Opt_inline_xattr, "inline_xattr"}, 68 + {Opt_inline_data, "inline_data"}, 69 69 {Opt_err, NULL}, 70 70 }; 71 71 ··· 74 72 enum { 75 73 GC_THREAD, /* struct f2fs_gc_thread */ 76 74 SM_INFO, /* struct f2fs_sm_info */ 75 + F2FS_SBI, /* struct f2fs_sb_info */ 77 76 }; 78 77 79 78 struct f2fs_attr { ··· 92 89 return (unsigned char *)sbi->gc_thread; 93 90 else if (struct_type == SM_INFO) 94 91 return (unsigned char *)SM_I(sbi); 92 + else if (struct_type == F2FS_SBI) 93 + return (unsigned char *)sbi; 95 94 return NULL; 96 95 } 97 96 ··· 180 175 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); 181 176 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); 182 177 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); 178 + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 179 + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 180 + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 181 + F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 183 182 184 183 #define ATTR_LIST(name) (&f2fs_attr_##name.attr) 185 184 static struct attribute *f2fs_attrs[] = { ··· 192 183 ATTR_LIST(gc_no_gc_sleep_time), 193 184 ATTR_LIST(gc_idle), 194 185 ATTR_LIST(reclaim_segments), 186 + ATTR_LIST(max_small_discards), 187 + ATTR_LIST(ipu_policy), 188 + ATTR_LIST(min_ipu_util), 189 + ATTR_LIST(max_victim_search), 195 190 NULL, 196 191 }; 197 192 ··· 324 311 case Opt_disable_ext_identify: 325 312 set_opt(sbi, DISABLE_EXT_IDENTIFY); 326 313 break; 314 + case Opt_inline_data: 315 + set_opt(sbi, INLINE_DATA); 316 + break; 327 317 default: 328 318 f2fs_msg(sb, KERN_ERR, 329 319 "Unrecognized mount option \"%s\" or missing value", ··· 341 325 { 342 326 struct f2fs_inode_info *fi; 343 327 344 - fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); 328 + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO); 345 329 if (!fi) 346 330 return NULL; 347 331 ··· 524 508 #endif 525 509 if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) 526 510 seq_puts(seq, ",disable_ext_identify"); 527 - 511 + if (test_opt(sbi, INLINE_DATA)) 512 + seq_puts(seq, ",inline_data"); 528 513 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 529 514 530 515 return 0; ··· 535 518 { 536 519 struct super_block *sb = seq->private; 537 520 struct f2fs_sb_info *sbi = F2FS_SB(sb); 538 - unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); 521 + unsigned int total_segs = 522 + le32_to_cpu(sbi->raw_super->segment_count_main); 539 523 int i; 540 524 541 525 for (i = 0; i < total_segs; i++) { ··· 636 618 struct f2fs_sb_info *sbi = F2FS_SB(sb); 637 619 struct inode *inode; 638 620 639 - if (ino < F2FS_ROOT_INO(sbi)) 621 + if (unlikely(ino < F2FS_ROOT_INO(sbi))) 640 622 return ERR_PTR(-ESTALE); 641 623 642 624 /* ··· 647 629 inode = f2fs_iget(sb, ino); 648 630 if (IS_ERR(inode)) 649 631 return ERR_CAST(inode); 650 - if (generation && inode->i_generation != generation) { 632 + if (unlikely(generation && inode->i_generation != generation)) { 651 633 /* we didn't find the right inode.. */ 652 634 iput(inode); 653 635 return ERR_PTR(-ESTALE); ··· 750 732 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); 751 733 fsmeta += le32_to_cpu(raw_super->segment_count_ssa); 752 734 753 - if (fsmeta >= total) 735 + if (unlikely(fsmeta >= total)) 754 736 return 1; 755 737 756 - if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 738 + if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 757 739 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 758 740 return 1; 759 741 } ··· 781 763 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); 782 764 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); 783 765 sbi->cur_victim_sec = NULL_SECNO; 766 + sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; 784 767 785 768 for (i = 0; i < NR_COUNT_TYPE; i++) 786 769 atomic_set(&sbi->nr_pages[i], 0); ··· 817 798 /* sanity checking of raw super */ 818 799 if (sanity_check_raw_super(sb, *raw_super)) { 819 800 brelse(*raw_super_buf); 820 - f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " 821 - "in %dth superblock", block + 1); 822 - if(block == 0) { 801 + f2fs_msg(sb, KERN_ERR, 802 + "Can't find valid F2FS filesystem in %dth superblock", 803 + block + 1); 804 + if (block == 0) { 823 805 block++; 824 806 goto retry; 825 807 } else { ··· 838 818 struct buffer_head *raw_super_buf; 839 819 struct inode *root; 840 820 long err = -EINVAL; 821 + int i; 841 822 842 823 /* allocate memory for f2fs-specific super block info */ 843 824 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); ··· 846 825 return -ENOMEM; 847 826 848 827 /* set a block size */ 849 - if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { 828 + if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { 850 829 f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); 851 830 goto free_sbi; 852 831 } ··· 895 874 mutex_init(&sbi->node_write); 896 875 sbi->por_doing = false; 897 876 spin_lock_init(&sbi->stat_lock); 898 - init_rwsem(&sbi->bio_sem); 877 + 878 + mutex_init(&sbi->read_io.io_mutex); 879 + sbi->read_io.sbi = sbi; 880 + sbi->read_io.bio = NULL; 881 + for (i = 0; i < NR_PAGE_TYPE; i++) { 882 + mutex_init(&sbi->write_io[i].io_mutex); 883 + sbi->write_io[i].sbi = sbi; 884 + sbi->write_io[i].bio = NULL; 885 + } 886 + 899 887 init_rwsem(&sbi->cp_rwsem); 900 888 init_waitqueue_head(&sbi->cp_wait); 901 889 init_sb_info(sbi); ··· 969 939 } 970 940 971 941 /* if there are nt orphan nodes free them */ 972 - err = -EINVAL; 973 - if (recover_orphan_inodes(sbi)) 974 - goto free_node_inode; 942 + recover_orphan_inodes(sbi); 975 943 976 944 /* read root inode and dentry */ 977 945 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); ··· 978 950 err = PTR_ERR(root); 979 951 goto free_node_inode; 980 952 } 981 - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) 953 + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 954 + err = -EINVAL; 982 955 goto free_root_inode; 956 + } 983 957 984 958 sb->s_root = d_make_root(root); /* allocate root dentry */ 985 959 if (!sb->s_root) { ··· 1083 1053 { 1084 1054 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 1085 1055 sizeof(struct f2fs_inode_info), NULL); 1086 - if (f2fs_inode_cachep == NULL) 1056 + if (!f2fs_inode_cachep) 1087 1057 return -ENOMEM; 1088 1058 return 0; 1089 1059 } ··· 1108 1078 err = create_node_manager_caches(); 1109 1079 if (err) 1110 1080 goto free_inodecache; 1111 - err = create_gc_caches(); 1081 + err = create_segment_manager_caches(); 1112 1082 if (err) 1113 1083 goto free_node_manager_caches; 1084 + err = create_gc_caches(); 1085 + if (err) 1086 + goto free_segment_manager_caches; 1114 1087 err = create_checkpoint_caches(); 1115 1088 if (err) 1116 1089 goto free_gc_caches; ··· 1135 1102 destroy_checkpoint_caches(); 1136 1103 free_gc_caches: 1137 1104 destroy_gc_caches(); 1105 + free_segment_manager_caches: 1106 + destroy_segment_manager_caches(); 1138 1107 free_node_manager_caches: 1139 1108 destroy_node_manager_caches(); 1140 1109 free_inodecache: ··· 1152 1117 unregister_filesystem(&f2fs_fs_type); 1153 1118 destroy_checkpoint_caches(); 1154 1119 destroy_gc_caches(); 1120 + destroy_segment_manager_caches(); 1155 1121 destroy_node_manager_caches(); 1156 1122 destroy_inodecache(); 1157 1123 kset_unregister(f2fs_kset);

+1 -1

fs/f2fs/xattr.c

··· 522 522 if (found) 523 523 free = free + ENTRY_SIZE(here); 524 524 525 - if (free < newsize) { 525 + if (unlikely(free < newsize)) { 526 526 error = -ENOSPC; 527 527 goto exit; 528 528 }

+7

include/linux/f2fs_fs.h

··· 153 153 #define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) 154 154 155 155 #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ 156 + #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ 157 + 158 + #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ 159 + F2FS_INLINE_XATTR_ADDRS - 1)) 160 + 161 + #define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \ 162 + - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1)) 156 163 157 164 struct f2fs_inode { 158 165 __le16 i_mode; /* file mode */

+82 -25

include/trace/events/f2fs.h

··· 16 16 { META, "META" }, \ 17 17 { META_FLUSH, "META_FLUSH" }) 18 18 19 - #define show_bio_type(type) \ 20 - __print_symbolic(type, \ 21 - { READ, "READ" }, \ 22 - { READA, "READAHEAD" }, \ 23 - { READ_SYNC, "READ_SYNC" }, \ 24 - { WRITE, "WRITE" }, \ 25 - { WRITE_SYNC, "WRITE_SYNC" }, \ 26 - { WRITE_FLUSH, "WRITE_FLUSH" }, \ 27 - { WRITE_FUA, "WRITE_FUA" }) 19 + #define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) 20 + #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) 21 + 22 + #define show_bio_type(type) show_bio_base(type), show_bio_extra(type) 23 + 24 + #define show_bio_base(type) \ 25 + __print_symbolic(F2FS_BIO_MASK(type), \ 26 + { READ, "READ" }, \ 27 + { READA, "READAHEAD" }, \ 28 + { READ_SYNC, "READ_SYNC" }, \ 29 + { WRITE, "WRITE" }, \ 30 + { WRITE_SYNC, "WRITE_SYNC" }, \ 31 + { WRITE_FLUSH, "WRITE_FLUSH" }, \ 32 + { WRITE_FUA, "WRITE_FUA" }, \ 33 + { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) 34 + 35 + #define show_bio_extra(type) \ 36 + __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ 37 + { REQ_META, "(M)" }, \ 38 + { REQ_PRIO, "(P)" }, \ 39 + { REQ_META | REQ_PRIO, "(MP)" }, \ 40 + { 0, " \b" }) 28 41 29 42 #define show_data_type(type) \ 30 43 __print_symbolic(type, \ ··· 434 421 __entry->err) 435 422 ); 436 423 437 - TRACE_EVENT_CONDITION(f2fs_readpage, 424 + TRACE_EVENT_CONDITION(f2fs_submit_page_bio, 438 425 439 426 TP_PROTO(struct page *page, sector_t blkaddr, int type), 440 427 ··· 459 446 ), 460 447 461 448 TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " 462 - "blkaddr = 0x%llx, bio_type = %s", 449 + "blkaddr = 0x%llx, bio_type = %s%s", 463 450 show_dev_ino(__entry), 464 451 (unsigned long)__entry->index, 465 452 (unsigned long long)__entry->blkaddr, ··· 611 598 __entry->ofs_in_node) 612 599 ); 613 600 614 - TRACE_EVENT(f2fs_do_submit_bio, 601 + DECLARE_EVENT_CLASS(f2fs__submit_bio, 615 602 616 - TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), 603 + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), 617 604 618 - TP_ARGS(sb, btype, sync, bio), 605 + TP_ARGS(sb, rw, type, bio), 619 606 620 607 TP_STRUCT__entry( 621 608 __field(dev_t, dev) 622 - __field(int, btype) 623 - __field(bool, sync) 609 + __field(int, rw) 610 + __field(int, type) 624 611 __field(sector_t, sector) 625 612 __field(unsigned int, size) 626 613 ), 627 614 628 615 TP_fast_assign( 629 616 __entry->dev = sb->s_dev; 630 - __entry->btype = btype; 631 - __entry->sync = sync; 617 + __entry->rw = rw; 618 + __entry->type = type; 632 619 __entry->sector = bio->bi_sector; 633 620 __entry->size = bio->bi_size; 634 621 ), 635 622 636 - TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", 623 + TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u", 637 624 show_dev(__entry), 638 - show_block_type(__entry->btype), 639 - __entry->sync ? "sync" : "no sync", 625 + show_bio_type(__entry->rw), 626 + show_block_type(__entry->type), 640 627 (unsigned long long)__entry->sector, 641 628 __entry->size) 629 + ); 630 + 631 + DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, 632 + 633 + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), 634 + 635 + TP_ARGS(sb, rw, type, bio), 636 + 637 + TP_CONDITION(bio) 638 + ); 639 + 640 + DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, 641 + 642 + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), 643 + 644 + TP_ARGS(sb, rw, type, bio), 645 + 646 + TP_CONDITION(bio) 642 647 ); 643 648 644 649 DECLARE_EVENT_CLASS(f2fs__page, ··· 705 674 TP_ARGS(page, type) 706 675 ); 707 676 708 - TRACE_EVENT(f2fs_submit_write_page, 677 + TRACE_EVENT(f2fs_submit_page_mbio, 709 678 710 - TP_PROTO(struct page *page, block_t blk_addr, int type), 679 + TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), 711 680 712 - TP_ARGS(page, blk_addr, type), 681 + TP_ARGS(page, rw, type, blk_addr), 713 682 714 683 TP_STRUCT__entry( 715 684 __field(dev_t, dev) 716 685 __field(ino_t, ino) 686 + __field(int, rw) 717 687 __field(int, type) 718 688 __field(pgoff_t, index) 719 689 __field(block_t, block) ··· 723 691 TP_fast_assign( 724 692 __entry->dev = page->mapping->host->i_sb->s_dev; 725 693 __entry->ino = page->mapping->host->i_ino; 694 + __entry->rw = rw; 726 695 __entry->type = type; 727 696 __entry->index = page->index; 728 697 __entry->block = blk_addr; 729 698 ), 730 699 731 - TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", 700 + TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", 732 701 show_dev_ino(__entry), 702 + show_bio_type(__entry->rw), 733 703 show_block_type(__entry->type), 734 704 (unsigned long)__entry->index, 735 705 (unsigned long long)__entry->block) ··· 761 727 __entry->msg) 762 728 ); 763 729 730 + TRACE_EVENT(f2fs_issue_discard, 731 + 732 + TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen), 733 + 734 + TP_ARGS(sb, blkstart, blklen), 735 + 736 + TP_STRUCT__entry( 737 + __field(dev_t, dev) 738 + __field(block_t, blkstart) 739 + __field(block_t, blklen) 740 + ), 741 + 742 + TP_fast_assign( 743 + __entry->dev = sb->s_dev; 744 + __entry->blkstart = blkstart; 745 + __entry->blklen = blklen; 746 + ), 747 + 748 + TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx", 749 + show_dev(__entry), 750 + (unsigned long long)__entry->blkstart, 751 + (unsigned long long)__entry->blklen) 752 + ); 764 753 #endif /* _TRACE_F2FS_H */ 765 754 766 755 /* This part must be outside protection */