Merge tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

+3 -1

Documentation/filesystems/f2fs.txt

··· 143 143 extent_cache Enable an extent cache based on rb-tree, it can cache 144 144 as many as extent which map between contiguous logical 145 145 address and physical address per inode, resulting in 146 - increasing the cache hit ratio. 146 + increasing the cache hit ratio. Set by default. 147 + noextent_cache Diable an extent cache based on rb-tree explicitly, see 148 + the above extent_cache mount option. 147 149 noinline_data Disable the inline data feature, inline data feature is 148 150 enabled by default. 149 151

+2

MAINTAINERS

··· 4416 4416 F2FS FILE SYSTEM 4417 4417 M: Jaegeuk Kim <jaegeuk@kernel.org> 4418 4418 M: Changman Lee <cm224.lee@samsung.com> 4419 + R: Chao Yu <chao2.yu@samsung.com> 4419 4420 L: linux-f2fs-devel@lists.sourceforge.net 4420 4421 W: http://en.wikipedia.org/wiki/F2FS 4421 4422 T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git ··· 4425 4424 F: Documentation/ABI/testing/sysfs-fs-f2fs 4426 4425 F: fs/f2fs/ 4427 4426 F: include/linux/f2fs_fs.h 4427 + F: include/trace/events/f2fs.h 4428 4428 4429 4429 FUJITSU FR-V (FRV) PORT 4430 4430 M: David Howells <dhowells@redhat.com>

+1 -1

fs/f2fs/Kconfig

··· 45 45 default y 46 46 help 47 47 Posix Access Control Lists (ACLs) support permissions for users and 48 - gourps beyond the owner/group/world scheme. 48 + groups beyond the owner/group/world scheme. 49 49 50 50 To learn more about Access Control Lists, visit the POSIX ACLs for 51 51 Linux website <http://acl.bestbits.at/>.

+1

fs/f2fs/Makefile

··· 2 2 3 3 f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o 4 4 f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o 5 + f2fs-y += shrinker.o extent_cache.o 5 6 f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o 6 7 f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o 7 8 f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o

+58 -35

fs/f2fs/checkpoint.c

··· 69 69 70 70 fio.page = page; 71 71 72 - if (f2fs_submit_page_bio(&fio)) 72 + if (f2fs_submit_page_bio(&fio)) { 73 + f2fs_put_page(page, 1); 73 74 goto repeat; 75 + } 74 76 75 77 lock_page(page); 76 78 if (unlikely(page->mapping != mapping)) { 77 79 f2fs_put_page(page, 1); 78 80 goto repeat; 79 81 } 82 + 83 + /* 84 + * if there is any IO error when accessing device, make our filesystem 85 + * readonly and make sure do not write checkpoint with non-uptodate 86 + * meta page. 87 + */ 88 + if (unlikely(!PageUptodate(page))) 89 + f2fs_stop_checkpoint(sbi); 80 90 out: 81 91 return page; 82 92 } ··· 336 326 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 337 327 { 338 328 struct inode_management *im = &sbi->im[type]; 339 - struct ino_entry *e; 329 + struct ino_entry *e, *tmp; 330 + 331 + tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 340 332 retry: 341 - if (radix_tree_preload(GFP_NOFS)) { 342 - cond_resched(); 343 - goto retry; 344 - } 333 + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 345 334 346 335 spin_lock(&im->ino_lock); 347 - 348 336 e = radix_tree_lookup(&im->ino_root, ino); 349 337 if (!e) { 350 - e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); 351 - if (!e) { 352 - spin_unlock(&im->ino_lock); 353 - radix_tree_preload_end(); 354 - goto retry; 355 - } 338 + e = tmp; 356 339 if (radix_tree_insert(&im->ino_root, ino, e)) { 357 340 spin_unlock(&im->ino_lock); 358 - kmem_cache_free(ino_entry_slab, e); 359 341 radix_tree_preload_end(); 360 342 goto retry; 361 343 } ··· 360 358 } 361 359 spin_unlock(&im->ino_lock); 362 360 radix_tree_preload_end(); 361 + 362 + if (e != tmp) 363 + kmem_cache_free(ino_entry_slab, tmp); 363 364 } 364 365 365 366 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) ··· 463 458 __remove_ino_entry(sbi, ino, ORPHAN_INO); 464 459 } 465 460 466 - static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 461 + static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 467 462 { 468 - struct inode *inode = f2fs_iget(sbi->sb, ino); 469 - f2fs_bug_on(sbi, IS_ERR(inode)); 463 + struct inode *inode; 464 + 465 + inode = f2fs_iget(sbi->sb, ino); 466 + if (IS_ERR(inode)) { 467 + /* 468 + * there should be a bug that we can't find the entry 469 + * to orphan inode. 470 + */ 471 + f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); 472 + return PTR_ERR(inode); 473 + } 474 + 470 475 clear_nlink(inode); 471 476 472 477 /* truncate all the data during iput */ 473 478 iput(inode); 479 + return 0; 474 480 } 475 481 476 - void recover_orphan_inodes(struct f2fs_sb_info *sbi) 482 + int recover_orphan_inodes(struct f2fs_sb_info *sbi) 477 483 { 478 484 block_t start_blk, orphan_blocks, i, j; 485 + int err; 479 486 480 487 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 481 - return; 482 - 483 - set_sbi_flag(sbi, SBI_POR_DOING); 488 + return 0; 484 489 485 490 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 486 491 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); ··· 504 489 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 505 490 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 506 491 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 507 - recover_orphan_inode(sbi, ino); 492 + err = recover_orphan_inode(sbi, ino); 493 + if (err) { 494 + f2fs_put_page(page, 1); 495 + return err; 496 + } 508 497 } 509 498 f2fs_put_page(page, 1); 510 499 } 511 500 /* clear Orphan Flag */ 512 501 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 513 - clear_sbi_flag(sbi, SBI_POR_DOING); 514 - return; 502 + return 0; 515 503 } 516 504 517 505 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) ··· 522 504 struct list_head *head; 523 505 struct f2fs_orphan_block *orphan_blk = NULL; 524 506 unsigned int nentries = 0; 525 - unsigned short index; 507 + unsigned short index = 1; 526 508 unsigned short orphan_blocks; 527 509 struct page *page = NULL; 528 510 struct ino_entry *orphan = NULL; 529 511 struct inode_management *im = &sbi->im[ORPHAN_INO]; 530 512 531 513 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 532 - 533 - for (index = 0; index < orphan_blocks; index++) 534 - grab_meta_page(sbi, start_blk + index); 535 - 536 - index = 1; 537 514 538 515 /* 539 516 * we don't need to do spin_lock(&im->ino_lock) here, since all the ··· 540 527 /* loop for each orphan inode entry and write them in Jornal block */ 541 528 list_for_each_entry(orphan, head, list) { 542 529 if (!page) { 543 - page = find_get_page(META_MAPPING(sbi), start_blk++); 544 - f2fs_bug_on(sbi, !page); 530 + page = grab_meta_page(sbi, start_blk++); 545 531 orphan_blk = 546 532 (struct f2fs_orphan_block *)page_address(page); 547 533 memset(orphan_blk, 0, sizeof(*orphan_blk)); 548 - f2fs_put_page(page, 0); 549 534 } 550 535 551 536 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); ··· 715 704 struct inode_entry *new; 716 705 int ret = 0; 717 706 718 - if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 707 + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 708 + !S_ISLNK(inode->i_mode)) 719 709 return; 720 710 721 711 if (!S_ISDIR(inode->i_mode)) { ··· 904 892 __u32 crc32 = 0; 905 893 int i; 906 894 int cp_payload_blks = __cp_payload(sbi); 895 + block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); 896 + bool invalidate = false; 907 897 908 898 /* 909 899 * This avoids to conduct wrong roll-forward operations and uses 910 900 * metapages, so should be called prior to sync_meta_pages below. 911 901 */ 912 - discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); 902 + if (discard_next_dnode(sbi, discard_blk)) 903 + invalidate = true; 913 904 914 905 /* Flush all the NAT/SIT pages */ 915 906 while (get_pages(sbi, F2FS_DIRTY_META)) { ··· 1040 1025 1041 1026 /* wait for previous submitted meta pages writeback */ 1042 1027 wait_on_all_pages_writeback(sbi); 1028 + 1029 + /* 1030 + * invalidate meta page which is used temporarily for zeroing out 1031 + * block at the end of warm node chain. 1032 + */ 1033 + if (invalidate) 1034 + invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1035 + discard_blk); 1043 1036 1044 1037 release_dirty_inode(sbi); 1045 1038

+1 -2

fs/f2fs/crypto_key.c

··· 92 92 if (!ci) 93 93 return; 94 94 95 - if (ci->ci_keyring_key) 96 - key_put(ci->ci_keyring_key); 95 + key_put(ci->ci_keyring_key); 97 96 crypto_free_ablkcipher(ci->ci_ctfm); 98 97 kmem_cache_free(f2fs_crypt_info_cachep, ci); 99 98 }

+238 -717

fs/f2fs/data.c

··· 14 14 #include <linux/mpage.h> 15 15 #include <linux/writeback.h> 16 16 #include <linux/backing-dev.h> 17 + #include <linux/pagevec.h> 17 18 #include <linux/blkdev.h> 18 19 #include <linux/bio.h> 19 20 #include <linux/prefetch.h> ··· 26 25 #include "segment.h" 27 26 #include "trace.h" 28 27 #include <trace/events/f2fs.h> 29 - 30 - static struct kmem_cache *extent_tree_slab; 31 - static struct kmem_cache *extent_node_slab; 32 28 33 29 static void f2fs_read_end_io(struct bio *bio) 34 30 { ··· 90 92 { 91 93 struct bio *bio; 92 94 93 - /* No failure on bio allocation */ 94 - bio = bio_alloc(GFP_NOIO, npages); 95 + bio = f2fs_bio_alloc(npages); 95 96 96 97 bio->bi_bdev = sbi->sb->s_bdev; 97 98 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); ··· 155 158 156 159 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 157 160 bio_put(bio); 158 - f2fs_put_page(page, 1); 159 161 return -EFAULT; 160 162 } 161 163 ··· 262 266 return err; 263 267 } 264 268 265 - static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, 266 - struct extent_info *ei) 269 + int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) 267 270 { 268 - struct f2fs_inode_info *fi = F2FS_I(inode); 269 - pgoff_t start_fofs, end_fofs; 270 - block_t start_blkaddr; 271 - 272 - read_lock(&fi->ext_lock); 273 - if (fi->ext.len == 0) { 274 - read_unlock(&fi->ext_lock); 275 - return false; 276 - } 277 - 278 - stat_inc_total_hit(inode->i_sb); 279 - 280 - start_fofs = fi->ext.fofs; 281 - end_fofs = fi->ext.fofs + fi->ext.len - 1; 282 - start_blkaddr = fi->ext.blk; 283 - 284 - if (pgofs >= start_fofs && pgofs <= end_fofs) { 285 - *ei = fi->ext; 286 - stat_inc_read_hit(inode->i_sb); 287 - read_unlock(&fi->ext_lock); 288 - return true; 289 - } 290 - read_unlock(&fi->ext_lock); 291 - return false; 292 - } 293 - 294 - static bool update_extent_info(struct inode *inode, pgoff_t fofs, 295 - block_t blkaddr) 296 - { 297 - struct f2fs_inode_info *fi = F2FS_I(inode); 298 - pgoff_t start_fofs, end_fofs; 299 - block_t start_blkaddr, end_blkaddr; 300 - int need_update = true; 301 - 302 - write_lock(&fi->ext_lock); 303 - 304 - start_fofs = fi->ext.fofs; 305 - end_fofs = fi->ext.fofs + fi->ext.len - 1; 306 - start_blkaddr = fi->ext.blk; 307 - end_blkaddr = fi->ext.blk + fi->ext.len - 1; 308 - 309 - /* Drop and initialize the matched extent */ 310 - if (fi->ext.len == 1 && fofs == start_fofs) 311 - fi->ext.len = 0; 312 - 313 - /* Initial extent */ 314 - if (fi->ext.len == 0) { 315 - if (blkaddr != NULL_ADDR) { 316 - fi->ext.fofs = fofs; 317 - fi->ext.blk = blkaddr; 318 - fi->ext.len = 1; 319 - } 320 - goto end_update; 321 - } 322 - 323 - /* Front merge */ 324 - if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { 325 - fi->ext.fofs--; 326 - fi->ext.blk--; 327 - fi->ext.len++; 328 - goto end_update; 329 - } 330 - 331 - /* Back merge */ 332 - if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { 333 - fi->ext.len++; 334 - goto end_update; 335 - } 336 - 337 - /* Split the existing extent */ 338 - if (fi->ext.len > 1 && 339 - fofs >= start_fofs && fofs <= end_fofs) { 340 - if ((end_fofs - fofs) < (fi->ext.len >> 1)) { 341 - fi->ext.len = fofs - start_fofs; 342 - } else { 343 - fi->ext.fofs = fofs + 1; 344 - fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; 345 - fi->ext.len -= fofs - start_fofs + 1; 346 - } 347 - } else { 348 - need_update = false; 349 - } 350 - 351 - /* Finally, if the extent is very fragmented, let's drop the cache. */ 352 - if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { 353 - fi->ext.len = 0; 354 - set_inode_flag(fi, FI_NO_EXTENT); 355 - need_update = true; 356 - } 357 - end_update: 358 - write_unlock(&fi->ext_lock); 359 - return need_update; 360 - } 361 - 362 - static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 363 - struct extent_tree *et, struct extent_info *ei, 364 - struct rb_node *parent, struct rb_node **p) 365 - { 366 - struct extent_node *en; 367 - 368 - en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); 369 - if (!en) 370 - return NULL; 371 - 372 - en->ei = *ei; 373 - INIT_LIST_HEAD(&en->list); 374 - 375 - rb_link_node(&en->rb_node, parent, p); 376 - rb_insert_color(&en->rb_node, &et->root); 377 - et->count++; 378 - atomic_inc(&sbi->total_ext_node); 379 - return en; 380 - } 381 - 382 - static void __detach_extent_node(struct f2fs_sb_info *sbi, 383 - struct extent_tree *et, struct extent_node *en) 384 - { 385 - rb_erase(&en->rb_node, &et->root); 386 - et->count--; 387 - atomic_dec(&sbi->total_ext_node); 388 - 389 - if (et->cached_en == en) 390 - et->cached_en = NULL; 391 - } 392 - 393 - static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, 394 - nid_t ino) 395 - { 396 - struct extent_tree *et; 397 - 398 - down_read(&sbi->extent_tree_lock); 399 - et = radix_tree_lookup(&sbi->extent_tree_root, ino); 400 - if (!et) { 401 - up_read(&sbi->extent_tree_lock); 402 - return NULL; 403 - } 404 - atomic_inc(&et->refcount); 405 - up_read(&sbi->extent_tree_lock); 406 - 407 - return et; 408 - } 409 - 410 - static struct extent_tree *__grab_extent_tree(struct inode *inode) 411 - { 412 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 413 - struct extent_tree *et; 414 - nid_t ino = inode->i_ino; 415 - 416 - down_write(&sbi->extent_tree_lock); 417 - et = radix_tree_lookup(&sbi->extent_tree_root, ino); 418 - if (!et) { 419 - et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); 420 - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); 421 - memset(et, 0, sizeof(struct extent_tree)); 422 - et->ino = ino; 423 - et->root = RB_ROOT; 424 - et->cached_en = NULL; 425 - rwlock_init(&et->lock); 426 - atomic_set(&et->refcount, 0); 427 - et->count = 0; 428 - sbi->total_ext_tree++; 429 - } 430 - atomic_inc(&et->refcount); 431 - up_write(&sbi->extent_tree_lock); 432 - 433 - return et; 434 - } 435 - 436 - static struct extent_node *__lookup_extent_tree(struct extent_tree *et, 437 - unsigned int fofs) 438 - { 439 - struct rb_node *node = et->root.rb_node; 440 - struct extent_node *en; 441 - 442 - if (et->cached_en) { 443 - struct extent_info *cei = &et->cached_en->ei; 444 - 445 - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) 446 - return et->cached_en; 447 - } 448 - 449 - while (node) { 450 - en = rb_entry(node, struct extent_node, rb_node); 451 - 452 - if (fofs < en->ei.fofs) { 453 - node = node->rb_left; 454 - } else if (fofs >= en->ei.fofs + en->ei.len) { 455 - node = node->rb_right; 456 - } else { 457 - et->cached_en = en; 458 - return en; 459 - } 460 - } 461 - return NULL; 462 - } 463 - 464 - static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, 465 - struct extent_tree *et, struct extent_node *en) 466 - { 467 - struct extent_node *prev; 468 - struct rb_node *node; 469 - 470 - node = rb_prev(&en->rb_node); 471 - if (!node) 472 - return NULL; 473 - 474 - prev = rb_entry(node, struct extent_node, rb_node); 475 - if (__is_back_mergeable(&en->ei, &prev->ei)) { 476 - en->ei.fofs = prev->ei.fofs; 477 - en->ei.blk = prev->ei.blk; 478 - en->ei.len += prev->ei.len; 479 - __detach_extent_node(sbi, et, prev); 480 - return prev; 481 - } 482 - return NULL; 483 - } 484 - 485 - static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, 486 - struct extent_tree *et, struct extent_node *en) 487 - { 488 - struct extent_node *next; 489 - struct rb_node *node; 490 - 491 - node = rb_next(&en->rb_node); 492 - if (!node) 493 - return NULL; 494 - 495 - next = rb_entry(node, struct extent_node, rb_node); 496 - if (__is_front_mergeable(&en->ei, &next->ei)) { 497 - en->ei.len += next->ei.len; 498 - __detach_extent_node(sbi, et, next); 499 - return next; 500 - } 501 - return NULL; 502 - } 503 - 504 - static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 505 - struct extent_tree *et, struct extent_info *ei, 506 - struct extent_node **den) 507 - { 508 - struct rb_node **p = &et->root.rb_node; 509 - struct rb_node *parent = NULL; 510 - struct extent_node *en; 511 - 512 - while (*p) { 513 - parent = *p; 514 - en = rb_entry(parent, struct extent_node, rb_node); 515 - 516 - if (ei->fofs < en->ei.fofs) { 517 - if (__is_front_mergeable(ei, &en->ei)) { 518 - f2fs_bug_on(sbi, !den); 519 - en->ei.fofs = ei->fofs; 520 - en->ei.blk = ei->blk; 521 - en->ei.len += ei->len; 522 - *den = __try_back_merge(sbi, et, en); 523 - return en; 524 - } 525 - p = &(*p)->rb_left; 526 - } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 527 - if (__is_back_mergeable(ei, &en->ei)) { 528 - f2fs_bug_on(sbi, !den); 529 - en->ei.len += ei->len; 530 - *den = __try_front_merge(sbi, et, en); 531 - return en; 532 - } 533 - p = &(*p)->rb_right; 534 - } else { 535 - f2fs_bug_on(sbi, 1); 536 - } 537 - } 538 - 539 - return __attach_extent_node(sbi, et, ei, parent, p); 540 - } 541 - 542 - static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 543 - struct extent_tree *et, bool free_all) 544 - { 545 - struct rb_node *node, *next; 546 - struct extent_node *en; 547 - unsigned int count = et->count; 548 - 549 - node = rb_first(&et->root); 550 - while (node) { 551 - next = rb_next(node); 552 - en = rb_entry(node, struct extent_node, rb_node); 553 - 554 - if (free_all) { 555 - spin_lock(&sbi->extent_lock); 556 - if (!list_empty(&en->list)) 557 - list_del_init(&en->list); 558 - spin_unlock(&sbi->extent_lock); 559 - } 560 - 561 - if (free_all || list_empty(&en->list)) { 562 - __detach_extent_node(sbi, et, en); 563 - kmem_cache_free(extent_node_slab, en); 564 - } 565 - node = next; 566 - } 567 - 568 - return count - et->count; 569 - } 570 - 571 - static void f2fs_init_extent_tree(struct inode *inode, 572 - struct f2fs_extent *i_ext) 573 - { 574 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 575 - struct extent_tree *et; 576 - struct extent_node *en; 577 271 struct extent_info ei; 272 + struct inode *inode = dn->inode; 578 273 579 - if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) 580 - return; 581 - 582 - et = __grab_extent_tree(inode); 583 - 584 - write_lock(&et->lock); 585 - if (et->count) 586 - goto out; 587 - 588 - set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 589 - le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 590 - 591 - en = __insert_extent_tree(sbi, et, &ei, NULL); 592 - if (en) { 593 - et->cached_en = en; 594 - 595 - spin_lock(&sbi->extent_lock); 596 - list_add_tail(&en->list, &sbi->extent_list); 597 - spin_unlock(&sbi->extent_lock); 598 - } 599 - out: 600 - write_unlock(&et->lock); 601 - atomic_dec(&et->refcount); 602 - } 603 - 604 - static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 605 - struct extent_info *ei) 606 - { 607 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 608 - struct extent_tree *et; 609 - struct extent_node *en; 610 - 611 - trace_f2fs_lookup_extent_tree_start(inode, pgofs); 612 - 613 - et = __find_extent_tree(sbi, inode->i_ino); 614 - if (!et) 615 - return false; 616 - 617 - read_lock(&et->lock); 618 - en = __lookup_extent_tree(et, pgofs); 619 - if (en) { 620 - *ei = en->ei; 621 - spin_lock(&sbi->extent_lock); 622 - if (!list_empty(&en->list)) 623 - list_move_tail(&en->list, &sbi->extent_list); 624 - spin_unlock(&sbi->extent_lock); 625 - stat_inc_read_hit(sbi->sb); 626 - } 627 - stat_inc_total_hit(sbi->sb); 628 - read_unlock(&et->lock); 629 - 630 - trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); 631 - 632 - atomic_dec(&et->refcount); 633 - return en ? true : false; 634 - } 635 - 636 - static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, 637 - block_t blkaddr) 638 - { 639 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 640 - struct extent_tree *et; 641 - struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; 642 - struct extent_node *den = NULL; 643 - struct extent_info ei, dei; 644 - unsigned int endofs; 645 - 646 - trace_f2fs_update_extent_tree(inode, fofs, blkaddr); 647 - 648 - et = __grab_extent_tree(inode); 649 - 650 - write_lock(&et->lock); 651 - 652 - /* 1. lookup and remove existing extent info in cache */ 653 - en = __lookup_extent_tree(et, fofs); 654 - if (!en) 655 - goto update_extent; 656 - 657 - dei = en->ei; 658 - __detach_extent_node(sbi, et, en); 659 - 660 - /* 2. if extent can be split more, split and insert the left part */ 661 - if (dei.len > 1) { 662 - /* insert left part of split extent into cache */ 663 - if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { 664 - set_extent_info(&ei, dei.fofs, dei.blk, 665 - fofs - dei.fofs); 666 - en1 = __insert_extent_tree(sbi, et, &ei, NULL); 667 - } 668 - 669 - /* insert right part of split extent into cache */ 670 - endofs = dei.fofs + dei.len - 1; 671 - if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { 672 - set_extent_info(&ei, fofs + 1, 673 - fofs - dei.fofs + dei.blk, endofs - fofs); 674 - en2 = __insert_extent_tree(sbi, et, &ei, NULL); 675 - } 274 + if (f2fs_lookup_extent_cache(inode, index, &ei)) { 275 + dn->data_blkaddr = ei.blk + index - ei.fofs; 276 + return 0; 676 277 } 677 278 678 - update_extent: 679 - /* 3. update extent in extent cache */ 680 - if (blkaddr) { 681 - set_extent_info(&ei, fofs, blkaddr, 1); 682 - en3 = __insert_extent_tree(sbi, et, &ei, &den); 683 - } 684 - 685 - /* 4. update in global extent list */ 686 - spin_lock(&sbi->extent_lock); 687 - if (en && !list_empty(&en->list)) 688 - list_del(&en->list); 689 - /* 690 - * en1 and en2 split from en, they will become more and more smaller 691 - * fragments after splitting several times. So if the length is smaller 692 - * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. 693 - */ 694 - if (en1) 695 - list_add_tail(&en1->list, &sbi->extent_list); 696 - if (en2) 697 - list_add_tail(&en2->list, &sbi->extent_list); 698 - if (en3) { 699 - if (list_empty(&en3->list)) 700 - list_add_tail(&en3->list, &sbi->extent_list); 701 - else 702 - list_move_tail(&en3->list, &sbi->extent_list); 703 - } 704 - if (den && !list_empty(&den->list)) 705 - list_del(&den->list); 706 - spin_unlock(&sbi->extent_lock); 707 - 708 - /* 5. release extent node */ 709 - if (en) 710 - kmem_cache_free(extent_node_slab, en); 711 - if (den) 712 - kmem_cache_free(extent_node_slab, den); 713 - 714 - write_unlock(&et->lock); 715 - atomic_dec(&et->refcount); 716 - } 717 - 718 - void f2fs_preserve_extent_tree(struct inode *inode) 719 - { 720 - struct extent_tree *et; 721 - struct extent_info *ext = &F2FS_I(inode)->ext; 722 - bool sync = false; 723 - 724 - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 725 - return; 726 - 727 - et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); 728 - if (!et) { 729 - if (ext->len) { 730 - ext->len = 0; 731 - update_inode_page(inode); 732 - } 733 - return; 734 - } 735 - 736 - read_lock(&et->lock); 737 - if (et->count) { 738 - struct extent_node *en; 739 - 740 - if (et->cached_en) { 741 - en = et->cached_en; 742 - } else { 743 - struct rb_node *node = rb_first(&et->root); 744 - 745 - if (!node) 746 - node = rb_last(&et->root); 747 - en = rb_entry(node, struct extent_node, rb_node); 748 - } 749 - 750 - if (__is_extent_same(ext, &en->ei)) 751 - goto out; 752 - 753 - *ext = en->ei; 754 - sync = true; 755 - } else if (ext->len) { 756 - ext->len = 0; 757 - sync = true; 758 - } 759 - out: 760 - read_unlock(&et->lock); 761 - atomic_dec(&et->refcount); 762 - 763 - if (sync) 764 - update_inode_page(inode); 765 - } 766 - 767 - void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 768 - { 769 - struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; 770 - struct extent_node *en, *tmp; 771 - unsigned long ino = F2FS_ROOT_INO(sbi); 772 - struct radix_tree_iter iter; 773 - void **slot; 774 - unsigned int found; 775 - unsigned int node_cnt = 0, tree_cnt = 0; 776 - 777 - if (!test_opt(sbi, EXTENT_CACHE)) 778 - return; 779 - 780 - if (available_free_memory(sbi, EXTENT_CACHE)) 781 - return; 782 - 783 - spin_lock(&sbi->extent_lock); 784 - list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { 785 - if (!nr_shrink--) 786 - break; 787 - list_del_init(&en->list); 788 - } 789 - spin_unlock(&sbi->extent_lock); 790 - 791 - down_read(&sbi->extent_tree_lock); 792 - while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, 793 - (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 794 - unsigned i; 795 - 796 - ino = treevec[found - 1]->ino + 1; 797 - for (i = 0; i < found; i++) { 798 - struct extent_tree *et = treevec[i]; 799 - 800 - atomic_inc(&et->refcount); 801 - write_lock(&et->lock); 802 - node_cnt += __free_extent_tree(sbi, et, false); 803 - write_unlock(&et->lock); 804 - atomic_dec(&et->refcount); 805 - } 806 - } 807 - up_read(&sbi->extent_tree_lock); 808 - 809 - down_write(&sbi->extent_tree_lock); 810 - radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, 811 - F2FS_ROOT_INO(sbi)) { 812 - struct extent_tree *et = (struct extent_tree *)*slot; 813 - 814 - if (!atomic_read(&et->refcount) && !et->count) { 815 - radix_tree_delete(&sbi->extent_tree_root, et->ino); 816 - kmem_cache_free(extent_tree_slab, et); 817 - sbi->total_ext_tree--; 818 - tree_cnt++; 819 - } 820 - } 821 - up_write(&sbi->extent_tree_lock); 822 - 823 - trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); 824 - } 825 - 826 - void f2fs_destroy_extent_tree(struct inode *inode) 827 - { 828 - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 829 - struct extent_tree *et; 830 - unsigned int node_cnt = 0; 831 - 832 - if (!test_opt(sbi, EXTENT_CACHE)) 833 - return; 834 - 835 - et = __find_extent_tree(sbi, inode->i_ino); 836 - if (!et) 837 - goto out; 838 - 839 - /* free all extent info belong to this extent tree */ 840 - write_lock(&et->lock); 841 - node_cnt = __free_extent_tree(sbi, et, true); 842 - write_unlock(&et->lock); 843 - 844 - atomic_dec(&et->refcount); 845 - 846 - /* try to find and delete extent tree entry in radix tree */ 847 - down_write(&sbi->extent_tree_lock); 848 - et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); 849 - if (!et) { 850 - up_write(&sbi->extent_tree_lock); 851 - goto out; 852 - } 853 - f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); 854 - radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 855 - kmem_cache_free(extent_tree_slab, et); 856 - sbi->total_ext_tree--; 857 - up_write(&sbi->extent_tree_lock); 858 - out: 859 - trace_f2fs_destroy_extent_tree(inode, node_cnt); 860 - return; 861 - } 862 - 863 - void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) 864 - { 865 - if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 866 - f2fs_init_extent_tree(inode, i_ext); 867 - 868 - write_lock(&F2FS_I(inode)->ext_lock); 869 - get_extent_info(&F2FS_I(inode)->ext, *i_ext); 870 - write_unlock(&F2FS_I(inode)->ext_lock); 871 - } 872 - 873 - static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, 874 - struct extent_info *ei) 875 - { 876 - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) 877 - return false; 878 - 879 - if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) 880 - return f2fs_lookup_extent_tree(inode, pgofs, ei); 881 - 882 - return lookup_extent_info(inode, pgofs, ei); 883 - } 884 - 885 - void f2fs_update_extent_cache(struct dnode_of_data *dn) 886 - { 887 - struct f2fs_inode_info *fi = F2FS_I(dn->inode); 888 - pgoff_t fofs; 889 - 890 - f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 891 - 892 - if (is_inode_flag_set(fi, FI_NO_EXTENT)) 893 - return; 894 - 895 - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 896 - dn->ofs_in_node; 897 - 898 - if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) 899 - return f2fs_update_extent_tree(dn->inode, fofs, 900 - dn->data_blkaddr); 901 - 902 - if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) 903 - sync_inode_page(dn); 279 + return f2fs_reserve_block(dn, index); 904 280 } 905 281 906 282 struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) ··· 303 935 304 936 set_new_dnode(&dn, inode, NULL, NULL, 0); 305 937 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 306 - if (err) { 307 - f2fs_put_page(page, 1); 308 - return ERR_PTR(err); 309 - } 938 + if (err) 939 + goto put_err; 310 940 f2fs_put_dnode(&dn); 311 941 312 942 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 313 - f2fs_put_page(page, 1); 314 - return ERR_PTR(-ENOENT); 943 + err = -ENOENT; 944 + goto put_err; 315 945 } 316 946 got_it: 317 947 if (PageUptodate(page)) { ··· 334 968 fio.page = page; 335 969 err = f2fs_submit_page_bio(&fio); 336 970 if (err) 337 - return ERR_PTR(err); 971 + goto put_err; 338 972 return page; 973 + 974 + put_err: 975 + f2fs_put_page(page, 1); 976 + return ERR_PTR(err); 339 977 } 340 978 341 979 struct page *find_data_page(struct inode *inode, pgoff_t index) ··· 400 1030 * 401 1031 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 402 1032 * f2fs_unlock_op(). 403 - * Note that, ipage is set only by make_empty_dir. 1033 + * Note that, ipage is set only by make_empty_dir, and if any error occur, 1034 + * ipage should be released by this function. 404 1035 */ 405 1036 struct page *get_new_data_page(struct inode *inode, 406 1037 struct page *ipage, pgoff_t index, bool new_i_size) ··· 412 1041 int err; 413 1042 repeat: 414 1043 page = grab_cache_page(mapping, index); 415 - if (!page) 1044 + if (!page) { 1045 + /* 1046 + * before exiting, we should make sure ipage will be released 1047 + * if any error occur. 1048 + */ 1049 + f2fs_put_page(ipage, 1); 416 1050 return ERR_PTR(-ENOMEM); 1051 + } 417 1052 418 1053 set_new_dnode(&dn, inode, ipage, NULL, 0); 419 1054 err = f2fs_reserve_block(&dn, index); ··· 484 1107 485 1108 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, 486 1109 &sum, seg); 487 - 488 - /* direct IO doesn't use extent cache to maximize the performance */ 489 1110 set_data_blkaddr(dn); 490 1111 491 1112 /* update i_size */ ··· 491 1116 dn->ofs_in_node; 492 1117 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 493 1118 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 1119 + 1120 + /* direct IO doesn't use extent cache to maximize the performance */ 1121 + f2fs_drop_largest_extent(dn->inode, fofs); 494 1122 495 1123 return 0; 496 1124 } ··· 561 1183 * c. give the block addresses to blockdev 562 1184 */ 563 1185 static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, 564 - int create, bool fiemap) 1186 + int create, int flag) 565 1187 { 566 1188 unsigned int maxblocks = map->m_len; 567 1189 struct dnode_of_data dn; ··· 595 1217 err = 0; 596 1218 goto unlock_out; 597 1219 } 598 - if (dn.data_blkaddr == NEW_ADDR && !fiemap) 599 - goto put_out; 1220 + if (dn.data_blkaddr == NEW_ADDR) { 1221 + if (flag == F2FS_GET_BLOCK_BMAP) { 1222 + err = -ENOENT; 1223 + goto put_out; 1224 + } else if (flag == F2FS_GET_BLOCK_READ || 1225 + flag == F2FS_GET_BLOCK_DIO) { 1226 + goto put_out; 1227 + } 1228 + /* 1229 + * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP), 1230 + * mark it as mapped and unwritten block. 1231 + */ 1232 + } 600 1233 601 1234 if (dn.data_blkaddr != NULL_ADDR) { 602 1235 map->m_flags = F2FS_MAP_MAPPED; ··· 622 1233 map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; 623 1234 map->m_pblk = dn.data_blkaddr; 624 1235 } else { 1236 + if (flag == F2FS_GET_BLOCK_BMAP) 1237 + err = -ENOENT; 625 1238 goto put_out; 626 1239 } 627 1240 ··· 646 1255 err = 0; 647 1256 goto unlock_out; 648 1257 } 649 - if (dn.data_blkaddr == NEW_ADDR && !fiemap) 1258 + 1259 + if (dn.data_blkaddr == NEW_ADDR && 1260 + flag != F2FS_GET_BLOCK_FIEMAP) 650 1261 goto put_out; 651 1262 652 1263 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); ··· 690 1297 } 691 1298 692 1299 static int __get_data_block(struct inode *inode, sector_t iblock, 693 - struct buffer_head *bh, int create, bool fiemap) 1300 + struct buffer_head *bh, int create, int flag) 694 1301 { 695 1302 struct f2fs_map_blocks map; 696 1303 int ret; ··· 698 1305 map.m_lblk = iblock; 699 1306 map.m_len = bh->b_size >> inode->i_blkbits; 700 1307 701 - ret = f2fs_map_blocks(inode, &map, create, fiemap); 1308 + ret = f2fs_map_blocks(inode, &map, create, flag); 702 1309 if (!ret) { 703 1310 map_bh(bh, inode->i_sb, map.m_pblk); 704 1311 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; ··· 708 1315 } 709 1316 710 1317 static int get_data_block(struct inode *inode, sector_t iblock, 711 - struct buffer_head *bh_result, int create) 1318 + struct buffer_head *bh_result, int create, int flag) 712 1319 { 713 - return __get_data_block(inode, iblock, bh_result, create, false); 1320 + return __get_data_block(inode, iblock, bh_result, create, flag); 714 1321 } 715 1322 716 - static int get_data_block_fiemap(struct inode *inode, sector_t iblock, 1323 + static int get_data_block_dio(struct inode *inode, sector_t iblock, 717 1324 struct buffer_head *bh_result, int create) 718 1325 { 719 - return __get_data_block(inode, iblock, bh_result, create, true); 1326 + return __get_data_block(inode, iblock, bh_result, create, 1327 + F2FS_GET_BLOCK_DIO); 1328 + } 1329 + 1330 + static int get_data_block_bmap(struct inode *inode, sector_t iblock, 1331 + struct buffer_head *bh_result, int create) 1332 + { 1333 + return __get_data_block(inode, iblock, bh_result, create, 1334 + F2FS_GET_BLOCK_BMAP); 720 1335 } 721 1336 722 1337 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) ··· 768 1367 memset(&map_bh, 0, sizeof(struct buffer_head)); 769 1368 map_bh.b_size = len; 770 1369 771 - ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); 1370 + ret = get_data_block(inode, start_blk, &map_bh, 0, 1371 + F2FS_GET_BLOCK_FIEMAP); 772 1372 if (ret) 773 1373 goto out; 774 1374 ··· 1172 1770 return ret; 1173 1771 } 1174 1772 1773 + /* 1774 + * This function was copied from write_cche_pages from mm/page-writeback.c. 1775 + * The major change is making write step of cold data page separately from 1776 + * warm/hot data page. 1777 + */ 1778 + static int f2fs_write_cache_pages(struct address_space *mapping, 1779 + struct writeback_control *wbc, writepage_t writepage, 1780 + void *data) 1781 + { 1782 + int ret = 0; 1783 + int done = 0; 1784 + struct pagevec pvec; 1785 + int nr_pages; 1786 + pgoff_t uninitialized_var(writeback_index); 1787 + pgoff_t index; 1788 + pgoff_t end; /* Inclusive */ 1789 + pgoff_t done_index; 1790 + int cycled; 1791 + int range_whole = 0; 1792 + int tag; 1793 + int step = 0; 1794 + 1795 + pagevec_init(&pvec, 0); 1796 + next: 1797 + if (wbc->range_cyclic) { 1798 + writeback_index = mapping->writeback_index; /* prev offset */ 1799 + index = writeback_index; 1800 + if (index == 0) 1801 + cycled = 1; 1802 + else 1803 + cycled = 0; 1804 + end = -1; 1805 + } else { 1806 + index = wbc->range_start >> PAGE_CACHE_SHIFT; 1807 + end = wbc->range_end >> PAGE_CACHE_SHIFT; 1808 + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 1809 + range_whole = 1; 1810 + cycled = 1; /* ignore range_cyclic tests */ 1811 + } 1812 + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 1813 + tag = PAGECACHE_TAG_TOWRITE; 1814 + else 1815 + tag = PAGECACHE_TAG_DIRTY; 1816 + retry: 1817 + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 1818 + tag_pages_for_writeback(mapping, index, end); 1819 + done_index = index; 1820 + while (!done && (index <= end)) { 1821 + int i; 1822 + 1823 + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 1824 + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); 1825 + if (nr_pages == 0) 1826 + break; 1827 + 1828 + for (i = 0; i < nr_pages; i++) { 1829 + struct page *page = pvec.pages[i]; 1830 + 1831 + if (page->index > end) { 1832 + done = 1; 1833 + break; 1834 + } 1835 + 1836 + done_index = page->index; 1837 + 1838 + lock_page(page); 1839 + 1840 + if (unlikely(page->mapping != mapping)) { 1841 + continue_unlock: 1842 + unlock_page(page); 1843 + continue; 1844 + } 1845 + 1846 + if (!PageDirty(page)) { 1847 + /* someone wrote it for us */ 1848 + goto continue_unlock; 1849 + } 1850 + 1851 + if (step == is_cold_data(page)) 1852 + goto continue_unlock; 1853 + 1854 + if (PageWriteback(page)) { 1855 + if (wbc->sync_mode != WB_SYNC_NONE) 1856 + f2fs_wait_on_page_writeback(page, DATA); 1857 + else 1858 + goto continue_unlock; 1859 + } 1860 + 1861 + BUG_ON(PageWriteback(page)); 1862 + if (!clear_page_dirty_for_io(page)) 1863 + goto continue_unlock; 1864 + 1865 + ret = (*writepage)(page, wbc, data); 1866 + if (unlikely(ret)) { 1867 + if (ret == AOP_WRITEPAGE_ACTIVATE) { 1868 + unlock_page(page); 1869 + ret = 0; 1870 + } else { 1871 + done_index = page->index + 1; 1872 + done = 1; 1873 + break; 1874 + } 1875 + } 1876 + 1877 + if (--wbc->nr_to_write <= 0 && 1878 + wbc->sync_mode == WB_SYNC_NONE) { 1879 + done = 1; 1880 + break; 1881 + } 1882 + } 1883 + pagevec_release(&pvec); 1884 + cond_resched(); 1885 + } 1886 + 1887 + if (step < 1) { 1888 + step++; 1889 + goto next; 1890 + } 1891 + 1892 + if (!cycled && !done) { 1893 + cycled = 1; 1894 + index = 0; 1895 + end = writeback_index - 1; 1896 + goto retry; 1897 + } 1898 + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1899 + mapping->writeback_index = done_index; 1900 + 1901 + return ret; 1902 + } 1903 + 1175 1904 static int f2fs_write_data_pages(struct address_space *mapping, 1176 1905 struct writeback_control *wbc) 1177 1906 { ··· 1316 1783 1317 1784 /* deal with chardevs and other special file */ 1318 1785 if (!mapping->a_ops->writepage) 1786 + return 0; 1787 + 1788 + /* skip writing if there is no dirty page in this inode */ 1789 + if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) 1319 1790 return 0; 1320 1791 1321 1792 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && ··· 1337 1800 mutex_lock(&sbi->writepages); 1338 1801 locked = true; 1339 1802 } 1340 - ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1803 + ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1804 + f2fs_submit_merged_bio(sbi, DATA, WRITE); 1341 1805 if (locked) 1342 1806 mutex_unlock(&sbi->writepages); 1343 - 1344 - f2fs_submit_merged_bio(sbi, DATA, WRITE); 1345 1807 1346 1808 remove_dirty_dir_inode(inode); 1347 1809 ··· 1368 1832 { 1369 1833 struct inode *inode = mapping->host; 1370 1834 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1371 - struct page *page, *ipage; 1835 + struct page *page = NULL; 1836 + struct page *ipage; 1372 1837 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 1373 1838 struct dnode_of_data dn; 1374 1839 int err = 0; ··· 1419 1882 if (err) 1420 1883 goto put_fail; 1421 1884 } 1422 - err = f2fs_reserve_block(&dn, index); 1885 + 1886 + err = f2fs_get_block(&dn, index); 1423 1887 if (err) 1424 1888 goto put_fail; 1425 1889 put_next: 1426 1890 f2fs_put_dnode(&dn); 1427 1891 f2fs_unlock_op(sbi); 1428 1892 1429 - if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 1430 - return 0; 1431 - 1432 1893 f2fs_wait_on_page_writeback(page, DATA); 1894 + 1895 + if (len == PAGE_CACHE_SIZE) 1896 + goto out_update; 1897 + if (PageUptodate(page)) 1898 + goto out_clear; 1433 1899 1434 1900 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { 1435 1901 unsigned start = pos & (PAGE_CACHE_SIZE - 1); ··· 1440 1900 1441 1901 /* Reading beyond i_size is simple: memset to zero */ 1442 1902 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 1443 - goto out; 1903 + goto out_update; 1444 1904 } 1445 1905 1446 1906 if (dn.data_blkaddr == NEW_ADDR) { ··· 1460 1920 1461 1921 lock_page(page); 1462 1922 if (unlikely(!PageUptodate(page))) { 1463 - f2fs_put_page(page, 1); 1464 1923 err = -EIO; 1465 1924 goto fail; 1466 1925 } ··· 1471 1932 /* avoid symlink page */ 1472 1933 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { 1473 1934 err = f2fs_decrypt_one(inode, page); 1474 - if (err) { 1475 - f2fs_put_page(page, 1); 1935 + if (err) 1476 1936 goto fail; 1477 - } 1478 1937 } 1479 1938 } 1480 - out: 1939 + out_update: 1481 1940 SetPageUptodate(page); 1941 + out_clear: 1482 1942 clear_cold_data(page); 1483 1943 return 0; 1484 1944 ··· 1485 1947 f2fs_put_dnode(&dn); 1486 1948 unlock_fail: 1487 1949 f2fs_unlock_op(sbi); 1488 - f2fs_put_page(page, 1); 1489 1950 fail: 1951 + f2fs_put_page(page, 1); 1490 1952 f2fs_write_failed(mapping, pos + len); 1491 1953 return err; 1492 1954 } ··· 1517 1979 { 1518 1980 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1519 1981 1520 - if (iov_iter_rw(iter) == READ) 1521 - return 0; 1522 - 1523 1982 if (offset & blocksize_mask) 1524 1983 return -EINVAL; 1525 1984 ··· 1545 2010 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1546 2011 return 0; 1547 2012 1548 - if (check_direct_IO(inode, iter, offset)) 1549 - return 0; 2013 + err = check_direct_IO(inode, iter, offset); 2014 + if (err) 2015 + return err; 1550 2016 1551 2017 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1552 2018 1553 2019 if (iov_iter_rw(iter) == WRITE) 1554 2020 __allocate_data_blocks(inode, offset, count); 1555 2021 1556 - err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); 2022 + err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); 1557 2023 if (err < 0 && iov_iter_rw(iter) == WRITE) 1558 2024 f2fs_write_failed(mapping, offset + count); 1559 2025 ··· 1581 2045 else 1582 2046 inode_dec_dirty_pages(inode); 1583 2047 } 2048 + 2049 + /* This is atomic written page, keep Private */ 2050 + if (IS_ATOMIC_WRITTEN_PAGE(page)) 2051 + return; 2052 + 1584 2053 ClearPagePrivate(page); 1585 2054 } 1586 2055 ··· 1593 2052 { 1594 2053 /* If this is dirty page, keep PagePrivate */ 1595 2054 if (PageDirty(page)) 2055 + return 0; 2056 + 2057 + /* This is atomic written page, keep Private */ 2058 + if (IS_ATOMIC_WRITTEN_PAGE(page)) 1596 2059 return 0; 1597 2060 1598 2061 ClearPagePrivate(page); ··· 1613 2068 SetPageUptodate(page); 1614 2069 1615 2070 if (f2fs_is_atomic_file(inode)) { 1616 - register_inmem_page(inode, page); 1617 - return 1; 2071 + if (!IS_ATOMIC_WRITTEN_PAGE(page)) { 2072 + register_inmem_page(inode, page); 2073 + return 1; 2074 + } 2075 + /* 2076 + * Previously, this page has been registered, we just 2077 + * return here. 2078 + */ 2079 + return 0; 1618 2080 } 1619 2081 1620 2082 if (!PageDirty(page)) { ··· 1642 2090 if (err) 1643 2091 return err; 1644 2092 } 1645 - return generic_block_bmap(mapping, block, get_data_block); 1646 - } 1647 - 1648 - void init_extent_cache_info(struct f2fs_sb_info *sbi) 1649 - { 1650 - INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); 1651 - init_rwsem(&sbi->extent_tree_lock); 1652 - INIT_LIST_HEAD(&sbi->extent_list); 1653 - spin_lock_init(&sbi->extent_lock); 1654 - sbi->total_ext_tree = 0; 1655 - atomic_set(&sbi->total_ext_node, 0); 1656 - } 1657 - 1658 - int __init create_extent_cache(void) 1659 - { 1660 - extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1661 - sizeof(struct extent_tree)); 1662 - if (!extent_tree_slab) 1663 - return -ENOMEM; 1664 - extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1665 - sizeof(struct extent_node)); 1666 - if (!extent_node_slab) { 1667 - kmem_cache_destroy(extent_tree_slab); 1668 - return -ENOMEM; 1669 - } 1670 - return 0; 1671 - } 1672 - 1673 - void destroy_extent_cache(void) 1674 - { 1675 - kmem_cache_destroy(extent_node_slab); 1676 - kmem_cache_destroy(extent_tree_slab); 2093 + return generic_block_bmap(mapping, block, get_data_block_bmap); 1677 2094 } 1678 2095 1679 2096 const struct address_space_operations f2fs_dblock_aops = {

+24 -6

fs/f2fs/debug.c

··· 33 33 int i; 34 34 35 35 /* validation check of the segment numbers */ 36 - si->hit_ext = sbi->read_hit_ext; 37 - si->total_ext = sbi->total_hit_ext; 36 + si->hit_largest = atomic_read(&sbi->read_hit_largest); 37 + si->hit_cached = atomic_read(&sbi->read_hit_cached); 38 + si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree); 39 + si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; 40 + si->total_ext = atomic_read(&sbi->total_hit_ext); 38 41 si->ext_tree = sbi->total_ext_tree; 39 42 si->ext_node = atomic_read(&sbi->total_ext_node); 40 43 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); ··· 52 49 si->valid_count = valid_user_blocks(sbi); 53 50 si->valid_node_count = valid_node_count(sbi); 54 51 si->valid_inode_count = valid_inode_count(sbi); 52 + si->inline_xattr = atomic_read(&sbi->inline_xattr); 55 53 si->inline_inode = atomic_read(&sbi->inline_inode); 56 54 si->inline_dir = atomic_read(&sbi->inline_dir); 57 55 si->utilization = utilization(sbi); ··· 230 226 seq_printf(s, "Other: %u)\n - Data: %u\n", 231 227 si->valid_node_count - si->valid_inode_count, 232 228 si->valid_count - si->valid_node_count); 229 + seq_printf(s, " - Inline_xattr Inode: %u\n", 230 + si->inline_xattr); 233 231 seq_printf(s, " - Inline_data Inode: %u\n", 234 232 si->inline_inode); 235 233 seq_printf(s, " - Inline_dentry Inode: %u\n", ··· 282 276 si->bg_data_blks); 283 277 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, 284 278 si->bg_node_blks); 285 - seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 286 - si->hit_ext, si->total_ext); 287 - seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); 288 - seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); 279 + seq_puts(s, "\nExtent Cache:\n"); 280 + seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n", 281 + si->hit_largest, si->hit_cached, 282 + si->hit_rbtree); 283 + seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n", 284 + !si->total_ext ? 0 : 285 + (si->hit_total * 100) / si->total_ext, 286 + si->hit_total, si->total_ext); 287 + seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", 288 + si->ext_tree, si->ext_node); 289 289 seq_puts(s, "\nBalancing F2FS Async:\n"); 290 290 seq_printf(s, " - inmem: %4d, wb: %4d\n", 291 291 si->inmem_pages, si->wb_pages); ··· 378 366 si->sbi = sbi; 379 367 sbi->stat_info = si; 380 368 369 + atomic_set(&sbi->total_hit_ext, 0); 370 + atomic_set(&sbi->read_hit_rbtree, 0); 371 + atomic_set(&sbi->read_hit_largest, 0); 372 + atomic_set(&sbi->read_hit_cached, 0); 373 + 374 + atomic_set(&sbi->inline_xattr, 0); 381 375 atomic_set(&sbi->inline_inode, 0); 382 376 atomic_set(&sbi->inline_dir, 0); 383 377 atomic_set(&sbi->inplace_count, 0);

+2 -2

fs/f2fs/dir.c

··· 718 718 if (inode) 719 719 f2fs_drop_nlink(dir, inode, NULL); 720 720 721 - if (bit_pos == NR_DENTRY_IN_BLOCK) { 722 - truncate_hole(dir, page->index, page->index + 1); 721 + if (bit_pos == NR_DENTRY_IN_BLOCK && 722 + !truncate_hole(dir, page->index, page->index + 1)) { 723 723 clear_page_dirty_for_io(page); 724 724 ClearPagePrivate(page); 725 725 ClearPageUptodate(page);

+791

fs/f2fs/extent_cache.c

··· 1 + /* 2 + * f2fs extent cache support 3 + * 4 + * Copyright (c) 2015 Motorola Mobility 5 + * Copyright (c) 2015 Samsung Electronics 6 + * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 7 + * Chao Yu <chao2.yu@samsung.com> 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + */ 13 + 14 + #include <linux/fs.h> 15 + #include <linux/f2fs_fs.h> 16 + 17 + #include "f2fs.h" 18 + #include "node.h" 19 + #include <trace/events/f2fs.h> 20 + 21 + static struct kmem_cache *extent_tree_slab; 22 + static struct kmem_cache *extent_node_slab; 23 + 24 + static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 25 + struct extent_tree *et, struct extent_info *ei, 26 + struct rb_node *parent, struct rb_node **p) 27 + { 28 + struct extent_node *en; 29 + 30 + en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); 31 + if (!en) 32 + return NULL; 33 + 34 + en->ei = *ei; 35 + INIT_LIST_HEAD(&en->list); 36 + 37 + rb_link_node(&en->rb_node, parent, p); 38 + rb_insert_color(&en->rb_node, &et->root); 39 + et->count++; 40 + atomic_inc(&sbi->total_ext_node); 41 + return en; 42 + } 43 + 44 + static void __detach_extent_node(struct f2fs_sb_info *sbi, 45 + struct extent_tree *et, struct extent_node *en) 46 + { 47 + rb_erase(&en->rb_node, &et->root); 48 + et->count--; 49 + atomic_dec(&sbi->total_ext_node); 50 + 51 + if (et->cached_en == en) 52 + et->cached_en = NULL; 53 + } 54 + 55 + static struct extent_tree *__grab_extent_tree(struct inode *inode) 56 + { 57 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 58 + struct extent_tree *et; 59 + nid_t ino = inode->i_ino; 60 + 61 + down_write(&sbi->extent_tree_lock); 62 + et = radix_tree_lookup(&sbi->extent_tree_root, ino); 63 + if (!et) { 64 + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); 65 + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); 66 + memset(et, 0, sizeof(struct extent_tree)); 67 + et->ino = ino; 68 + et->root = RB_ROOT; 69 + et->cached_en = NULL; 70 + rwlock_init(&et->lock); 71 + atomic_set(&et->refcount, 0); 72 + et->count = 0; 73 + sbi->total_ext_tree++; 74 + } 75 + atomic_inc(&et->refcount); 76 + up_write(&sbi->extent_tree_lock); 77 + 78 + /* never died until evict_inode */ 79 + F2FS_I(inode)->extent_tree = et; 80 + 81 + return et; 82 + } 83 + 84 + static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, 85 + struct extent_tree *et, unsigned int fofs) 86 + { 87 + struct rb_node *node = et->root.rb_node; 88 + struct extent_node *en = et->cached_en; 89 + 90 + if (en) { 91 + struct extent_info *cei = &en->ei; 92 + 93 + if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { 94 + stat_inc_cached_node_hit(sbi); 95 + return en; 96 + } 97 + } 98 + 99 + while (node) { 100 + en = rb_entry(node, struct extent_node, rb_node); 101 + 102 + if (fofs < en->ei.fofs) { 103 + node = node->rb_left; 104 + } else if (fofs >= en->ei.fofs + en->ei.len) { 105 + node = node->rb_right; 106 + } else { 107 + stat_inc_rbtree_node_hit(sbi); 108 + return en; 109 + } 110 + } 111 + return NULL; 112 + } 113 + 114 + static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, 115 + struct extent_tree *et, struct extent_info *ei) 116 + { 117 + struct rb_node **p = &et->root.rb_node; 118 + struct extent_node *en; 119 + 120 + en = __attach_extent_node(sbi, et, ei, NULL, p); 121 + if (!en) 122 + return NULL; 123 + 124 + et->largest = en->ei; 125 + et->cached_en = en; 126 + return en; 127 + } 128 + 129 + static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 130 + struct extent_tree *et, bool free_all) 131 + { 132 + struct rb_node *node, *next; 133 + struct extent_node *en; 134 + unsigned int count = et->count; 135 + 136 + node = rb_first(&et->root); 137 + while (node) { 138 + next = rb_next(node); 139 + en = rb_entry(node, struct extent_node, rb_node); 140 + 141 + if (free_all) { 142 + spin_lock(&sbi->extent_lock); 143 + if (!list_empty(&en->list)) 144 + list_del_init(&en->list); 145 + spin_unlock(&sbi->extent_lock); 146 + } 147 + 148 + if (free_all || list_empty(&en->list)) { 149 + __detach_extent_node(sbi, et, en); 150 + kmem_cache_free(extent_node_slab, en); 151 + } 152 + node = next; 153 + } 154 + 155 + return count - et->count; 156 + } 157 + 158 + static void __drop_largest_extent(struct inode *inode, pgoff_t fofs) 159 + { 160 + struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; 161 + 162 + if (largest->fofs <= fofs && largest->fofs + largest->len > fofs) 163 + largest->len = 0; 164 + } 165 + 166 + void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) 167 + { 168 + if (!f2fs_may_extent_tree(inode)) 169 + return; 170 + 171 + __drop_largest_extent(inode, fofs); 172 + } 173 + 174 + void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) 175 + { 176 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 177 + struct extent_tree *et; 178 + struct extent_node *en; 179 + struct extent_info ei; 180 + 181 + if (!f2fs_may_extent_tree(inode)) 182 + return; 183 + 184 + et = __grab_extent_tree(inode); 185 + 186 + if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) 187 + return; 188 + 189 + set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 190 + le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 191 + 192 + write_lock(&et->lock); 193 + if (et->count) 194 + goto out; 195 + 196 + en = __init_extent_tree(sbi, et, &ei); 197 + if (en) { 198 + spin_lock(&sbi->extent_lock); 199 + list_add_tail(&en->list, &sbi->extent_list); 200 + spin_unlock(&sbi->extent_lock); 201 + } 202 + out: 203 + write_unlock(&et->lock); 204 + } 205 + 206 + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 207 + struct extent_info *ei) 208 + { 209 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 210 + struct extent_tree *et = F2FS_I(inode)->extent_tree; 211 + struct extent_node *en; 212 + bool ret = false; 213 + 214 + f2fs_bug_on(sbi, !et); 215 + 216 + trace_f2fs_lookup_extent_tree_start(inode, pgofs); 217 + 218 + read_lock(&et->lock); 219 + 220 + if (et->largest.fofs <= pgofs && 221 + et->largest.fofs + et->largest.len > pgofs) { 222 + *ei = et->largest; 223 + ret = true; 224 + stat_inc_largest_node_hit(sbi); 225 + goto out; 226 + } 227 + 228 + en = __lookup_extent_tree(sbi, et, pgofs); 229 + if (en) { 230 + *ei = en->ei; 231 + spin_lock(&sbi->extent_lock); 232 + if (!list_empty(&en->list)) 233 + list_move_tail(&en->list, &sbi->extent_list); 234 + et->cached_en = en; 235 + spin_unlock(&sbi->extent_lock); 236 + ret = true; 237 + } 238 + out: 239 + stat_inc_total_hit(sbi); 240 + read_unlock(&et->lock); 241 + 242 + trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); 243 + return ret; 244 + } 245 + 246 + 247 + /* 248 + * lookup extent at @fofs, if hit, return the extent 249 + * if not, return NULL and 250 + * @prev_ex: extent before fofs 251 + * @next_ex: extent after fofs 252 + * @insert_p: insert point for new extent at fofs 253 + * in order to simpfy the insertion after. 254 + * tree must stay unchanged between lookup and insertion. 255 + */ 256 + static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, 257 + unsigned int fofs, 258 + struct extent_node **prev_ex, 259 + struct extent_node **next_ex, 260 + struct rb_node ***insert_p, 261 + struct rb_node **insert_parent) 262 + { 263 + struct rb_node **pnode = &et->root.rb_node; 264 + struct rb_node *parent = NULL, *tmp_node; 265 + struct extent_node *en = et->cached_en; 266 + 267 + *insert_p = NULL; 268 + *insert_parent = NULL; 269 + *prev_ex = NULL; 270 + *next_ex = NULL; 271 + 272 + if (RB_EMPTY_ROOT(&et->root)) 273 + return NULL; 274 + 275 + if (en) { 276 + struct extent_info *cei = &en->ei; 277 + 278 + if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) 279 + goto lookup_neighbors; 280 + } 281 + 282 + while (*pnode) { 283 + parent = *pnode; 284 + en = rb_entry(*pnode, struct extent_node, rb_node); 285 + 286 + if (fofs < en->ei.fofs) 287 + pnode = &(*pnode)->rb_left; 288 + else if (fofs >= en->ei.fofs + en->ei.len) 289 + pnode = &(*pnode)->rb_right; 290 + else 291 + goto lookup_neighbors; 292 + } 293 + 294 + *insert_p = pnode; 295 + *insert_parent = parent; 296 + 297 + en = rb_entry(parent, struct extent_node, rb_node); 298 + tmp_node = parent; 299 + if (parent && fofs > en->ei.fofs) 300 + tmp_node = rb_next(parent); 301 + *next_ex = tmp_node ? 302 + rb_entry(tmp_node, struct extent_node, rb_node) : NULL; 303 + 304 + tmp_node = parent; 305 + if (parent && fofs < en->ei.fofs) 306 + tmp_node = rb_prev(parent); 307 + *prev_ex = tmp_node ? 308 + rb_entry(tmp_node, struct extent_node, rb_node) : NULL; 309 + return NULL; 310 + 311 + lookup_neighbors: 312 + if (fofs == en->ei.fofs) { 313 + /* lookup prev node for merging backward later */ 314 + tmp_node = rb_prev(&en->rb_node); 315 + *prev_ex = tmp_node ? 316 + rb_entry(tmp_node, struct extent_node, rb_node) : NULL; 317 + } 318 + if (fofs == en->ei.fofs + en->ei.len - 1) { 319 + /* lookup next node for merging frontward later */ 320 + tmp_node = rb_next(&en->rb_node); 321 + *next_ex = tmp_node ? 322 + rb_entry(tmp_node, struct extent_node, rb_node) : NULL; 323 + } 324 + return en; 325 + } 326 + 327 + static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 328 + struct extent_tree *et, struct extent_info *ei, 329 + struct extent_node **den, 330 + struct extent_node *prev_ex, 331 + struct extent_node *next_ex) 332 + { 333 + struct extent_node *en = NULL; 334 + 335 + if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { 336 + prev_ex->ei.len += ei->len; 337 + ei = &prev_ex->ei; 338 + en = prev_ex; 339 + } 340 + 341 + if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { 342 + if (en) { 343 + __detach_extent_node(sbi, et, prev_ex); 344 + *den = prev_ex; 345 + } 346 + next_ex->ei.fofs = ei->fofs; 347 + next_ex->ei.blk = ei->blk; 348 + next_ex->ei.len += ei->len; 349 + en = next_ex; 350 + } 351 + 352 + if (en) { 353 + if (en->ei.len > et->largest.len) 354 + et->largest = en->ei; 355 + et->cached_en = en; 356 + } 357 + return en; 358 + } 359 + 360 + static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 361 + struct extent_tree *et, struct extent_info *ei, 362 + struct rb_node **insert_p, 363 + struct rb_node *insert_parent) 364 + { 365 + struct rb_node **p = &et->root.rb_node; 366 + struct rb_node *parent = NULL; 367 + struct extent_node *en = NULL; 368 + 369 + if (insert_p && insert_parent) { 370 + parent = insert_parent; 371 + p = insert_p; 372 + goto do_insert; 373 + } 374 + 375 + while (*p) { 376 + parent = *p; 377 + en = rb_entry(parent, struct extent_node, rb_node); 378 + 379 + if (ei->fofs < en->ei.fofs) 380 + p = &(*p)->rb_left; 381 + else if (ei->fofs >= en->ei.fofs + en->ei.len) 382 + p = &(*p)->rb_right; 383 + else 384 + f2fs_bug_on(sbi, 1); 385 + } 386 + do_insert: 387 + en = __attach_extent_node(sbi, et, ei, parent, p); 388 + if (!en) 389 + return NULL; 390 + 391 + if (en->ei.len > et->largest.len) 392 + et->largest = en->ei; 393 + et->cached_en = en; 394 + return en; 395 + } 396 + 397 + unsigned int f2fs_update_extent_tree_range(struct inode *inode, 398 + pgoff_t fofs, block_t blkaddr, unsigned int len) 399 + { 400 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 401 + struct extent_tree *et = F2FS_I(inode)->extent_tree; 402 + struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; 403 + struct extent_node *prev_en = NULL, *next_en = NULL; 404 + struct extent_info ei, dei, prev; 405 + struct rb_node **insert_p = NULL, *insert_parent = NULL; 406 + unsigned int end = fofs + len; 407 + unsigned int pos = (unsigned int)fofs; 408 + 409 + if (!et) 410 + return false; 411 + 412 + write_lock(&et->lock); 413 + 414 + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) { 415 + write_unlock(&et->lock); 416 + return false; 417 + } 418 + 419 + prev = et->largest; 420 + dei.len = 0; 421 + 422 + /* we do not guarantee that the largest extent is cached all the time */ 423 + __drop_largest_extent(inode, fofs); 424 + 425 + /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 426 + en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, 427 + &insert_p, &insert_parent); 428 + if (!en) { 429 + if (next_en) { 430 + en = next_en; 431 + f2fs_bug_on(sbi, en->ei.fofs <= pos); 432 + pos = en->ei.fofs; 433 + } else { 434 + /* 435 + * skip searching in the tree since there is no 436 + * larger extent node in the cache. 437 + */ 438 + goto update_extent; 439 + } 440 + } 441 + 442 + /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ 443 + while (en) { 444 + struct rb_node *node; 445 + 446 + if (pos >= end) 447 + break; 448 + 449 + dei = en->ei; 450 + en1 = en2 = NULL; 451 + 452 + node = rb_next(&en->rb_node); 453 + 454 + /* 455 + * 2.1 there are four cases when we invalidate blkaddr in extent 456 + * node, |V: valid address, X: will be invalidated| 457 + */ 458 + /* case#1, invalidate right part of extent node |VVVVVXXXXX| */ 459 + if (pos > dei.fofs && end >= dei.fofs + dei.len) { 460 + en->ei.len = pos - dei.fofs; 461 + 462 + if (en->ei.len < F2FS_MIN_EXTENT_LEN) { 463 + __detach_extent_node(sbi, et, en); 464 + insert_p = NULL; 465 + insert_parent = NULL; 466 + goto update; 467 + } 468 + 469 + if (__is_extent_same(&dei, &et->largest)) 470 + et->largest = en->ei; 471 + goto next; 472 + } 473 + 474 + /* case#2, invalidate left part of extent node |XXXXXVVVVV| */ 475 + if (pos <= dei.fofs && end < dei.fofs + dei.len) { 476 + en->ei.fofs = end; 477 + en->ei.blk += end - dei.fofs; 478 + en->ei.len -= end - dei.fofs; 479 + 480 + if (en->ei.len < F2FS_MIN_EXTENT_LEN) { 481 + __detach_extent_node(sbi, et, en); 482 + insert_p = NULL; 483 + insert_parent = NULL; 484 + goto update; 485 + } 486 + 487 + if (__is_extent_same(&dei, &et->largest)) 488 + et->largest = en->ei; 489 + goto next; 490 + } 491 + 492 + __detach_extent_node(sbi, et, en); 493 + 494 + /* 495 + * if we remove node in rb-tree, our parent node pointer may 496 + * point the wrong place, discard them. 497 + */ 498 + insert_p = NULL; 499 + insert_parent = NULL; 500 + 501 + /* case#3, invalidate entire extent node |XXXXXXXXXX| */ 502 + if (pos <= dei.fofs && end >= dei.fofs + dei.len) { 503 + if (__is_extent_same(&dei, &et->largest)) 504 + et->largest.len = 0; 505 + goto update; 506 + } 507 + 508 + /* 509 + * case#4, invalidate data in the middle of extent node 510 + * |VVVXXXXVVV| 511 + */ 512 + if (dei.len > F2FS_MIN_EXTENT_LEN) { 513 + unsigned int endofs; 514 + 515 + /* insert left part of split extent into cache */ 516 + if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { 517 + set_extent_info(&ei, dei.fofs, dei.blk, 518 + pos - dei.fofs); 519 + en1 = __insert_extent_tree(sbi, et, &ei, 520 + NULL, NULL); 521 + } 522 + 523 + /* insert right part of split extent into cache */ 524 + endofs = dei.fofs + dei.len; 525 + if (endofs - end >= F2FS_MIN_EXTENT_LEN) { 526 + set_extent_info(&ei, end, 527 + end - dei.fofs + dei.blk, 528 + endofs - end); 529 + en2 = __insert_extent_tree(sbi, et, &ei, 530 + NULL, NULL); 531 + } 532 + } 533 + update: 534 + /* 2.2 update in global extent list */ 535 + spin_lock(&sbi->extent_lock); 536 + if (en && !list_empty(&en->list)) 537 + list_del(&en->list); 538 + if (en1) 539 + list_add_tail(&en1->list, &sbi->extent_list); 540 + if (en2) 541 + list_add_tail(&en2->list, &sbi->extent_list); 542 + spin_unlock(&sbi->extent_lock); 543 + 544 + /* 2.3 release extent node */ 545 + if (en) 546 + kmem_cache_free(extent_node_slab, en); 547 + next: 548 + en = node ? rb_entry(node, struct extent_node, rb_node) : NULL; 549 + next_en = en; 550 + if (en) 551 + pos = en->ei.fofs; 552 + } 553 + 554 + update_extent: 555 + /* 3. update extent in extent cache */ 556 + if (blkaddr) { 557 + struct extent_node *den = NULL; 558 + 559 + set_extent_info(&ei, fofs, blkaddr, len); 560 + en3 = __try_merge_extent_node(sbi, et, &ei, &den, 561 + prev_en, next_en); 562 + if (!en3) 563 + en3 = __insert_extent_tree(sbi, et, &ei, 564 + insert_p, insert_parent); 565 + 566 + /* give up extent_cache, if split and small updates happen */ 567 + if (dei.len >= 1 && 568 + prev.len < F2FS_MIN_EXTENT_LEN && 569 + et->largest.len < F2FS_MIN_EXTENT_LEN) { 570 + et->largest.len = 0; 571 + set_inode_flag(F2FS_I(inode), FI_NO_EXTENT); 572 + } 573 + 574 + spin_lock(&sbi->extent_lock); 575 + if (en3) { 576 + if (list_empty(&en3->list)) 577 + list_add_tail(&en3->list, &sbi->extent_list); 578 + else 579 + list_move_tail(&en3->list, &sbi->extent_list); 580 + } 581 + if (den && !list_empty(&den->list)) 582 + list_del(&den->list); 583 + spin_unlock(&sbi->extent_lock); 584 + 585 + if (den) 586 + kmem_cache_free(extent_node_slab, den); 587 + } 588 + 589 + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) 590 + __free_extent_tree(sbi, et, true); 591 + 592 + write_unlock(&et->lock); 593 + 594 + return !__is_extent_same(&prev, &et->largest); 595 + } 596 + 597 + unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 598 + { 599 + struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; 600 + struct extent_node *en, *tmp; 601 + unsigned long ino = F2FS_ROOT_INO(sbi); 602 + struct radix_tree_root *root = &sbi->extent_tree_root; 603 + unsigned int found; 604 + unsigned int node_cnt = 0, tree_cnt = 0; 605 + int remained; 606 + 607 + if (!test_opt(sbi, EXTENT_CACHE)) 608 + return 0; 609 + 610 + if (!down_write_trylock(&sbi->extent_tree_lock)) 611 + goto out; 612 + 613 + /* 1. remove unreferenced extent tree */ 614 + while ((found = radix_tree_gang_lookup(root, 615 + (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 616 + unsigned i; 617 + 618 + ino = treevec[found - 1]->ino + 1; 619 + for (i = 0; i < found; i++) { 620 + struct extent_tree *et = treevec[i]; 621 + 622 + if (!atomic_read(&et->refcount)) { 623 + write_lock(&et->lock); 624 + node_cnt += __free_extent_tree(sbi, et, true); 625 + write_unlock(&et->lock); 626 + 627 + radix_tree_delete(root, et->ino); 628 + kmem_cache_free(extent_tree_slab, et); 629 + sbi->total_ext_tree--; 630 + tree_cnt++; 631 + 632 + if (node_cnt + tree_cnt >= nr_shrink) 633 + goto unlock_out; 634 + } 635 + } 636 + } 637 + up_write(&sbi->extent_tree_lock); 638 + 639 + /* 2. remove LRU extent entries */ 640 + if (!down_write_trylock(&sbi->extent_tree_lock)) 641 + goto out; 642 + 643 + remained = nr_shrink - (node_cnt + tree_cnt); 644 + 645 + spin_lock(&sbi->extent_lock); 646 + list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { 647 + if (!remained--) 648 + break; 649 + list_del_init(&en->list); 650 + } 651 + spin_unlock(&sbi->extent_lock); 652 + 653 + while ((found = radix_tree_gang_lookup(root, 654 + (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 655 + unsigned i; 656 + 657 + ino = treevec[found - 1]->ino + 1; 658 + for (i = 0; i < found; i++) { 659 + struct extent_tree *et = treevec[i]; 660 + 661 + write_lock(&et->lock); 662 + node_cnt += __free_extent_tree(sbi, et, false); 663 + write_unlock(&et->lock); 664 + 665 + if (node_cnt + tree_cnt >= nr_shrink) 666 + break; 667 + } 668 + } 669 + unlock_out: 670 + up_write(&sbi->extent_tree_lock); 671 + out: 672 + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); 673 + 674 + return node_cnt + tree_cnt; 675 + } 676 + 677 + unsigned int f2fs_destroy_extent_node(struct inode *inode) 678 + { 679 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 680 + struct extent_tree *et = F2FS_I(inode)->extent_tree; 681 + unsigned int node_cnt = 0; 682 + 683 + if (!et) 684 + return 0; 685 + 686 + write_lock(&et->lock); 687 + node_cnt = __free_extent_tree(sbi, et, true); 688 + write_unlock(&et->lock); 689 + 690 + return node_cnt; 691 + } 692 + 693 + void f2fs_destroy_extent_tree(struct inode *inode) 694 + { 695 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 696 + struct extent_tree *et = F2FS_I(inode)->extent_tree; 697 + unsigned int node_cnt = 0; 698 + 699 + if (!et) 700 + return; 701 + 702 + if (inode->i_nlink && !is_bad_inode(inode) && et->count) { 703 + atomic_dec(&et->refcount); 704 + return; 705 + } 706 + 707 + /* free all extent info belong to this extent tree */ 708 + node_cnt = f2fs_destroy_extent_node(inode); 709 + 710 + /* delete extent tree entry in radix tree */ 711 + down_write(&sbi->extent_tree_lock); 712 + atomic_dec(&et->refcount); 713 + f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); 714 + radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 715 + kmem_cache_free(extent_tree_slab, et); 716 + sbi->total_ext_tree--; 717 + up_write(&sbi->extent_tree_lock); 718 + 719 + F2FS_I(inode)->extent_tree = NULL; 720 + 721 + trace_f2fs_destroy_extent_tree(inode, node_cnt); 722 + } 723 + 724 + bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, 725 + struct extent_info *ei) 726 + { 727 + if (!f2fs_may_extent_tree(inode)) 728 + return false; 729 + 730 + return f2fs_lookup_extent_tree(inode, pgofs, ei); 731 + } 732 + 733 + void f2fs_update_extent_cache(struct dnode_of_data *dn) 734 + { 735 + struct f2fs_inode_info *fi = F2FS_I(dn->inode); 736 + pgoff_t fofs; 737 + 738 + if (!f2fs_may_extent_tree(dn->inode)) 739 + return; 740 + 741 + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 742 + 743 + 744 + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 745 + dn->ofs_in_node; 746 + 747 + if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1)) 748 + sync_inode_page(dn); 749 + } 750 + 751 + void f2fs_update_extent_cache_range(struct dnode_of_data *dn, 752 + pgoff_t fofs, block_t blkaddr, unsigned int len) 753 + 754 + { 755 + if (!f2fs_may_extent_tree(dn->inode)) 756 + return; 757 + 758 + if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len)) 759 + sync_inode_page(dn); 760 + } 761 + 762 + void init_extent_cache_info(struct f2fs_sb_info *sbi) 763 + { 764 + INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); 765 + init_rwsem(&sbi->extent_tree_lock); 766 + INIT_LIST_HEAD(&sbi->extent_list); 767 + spin_lock_init(&sbi->extent_lock); 768 + sbi->total_ext_tree = 0; 769 + atomic_set(&sbi->total_ext_node, 0); 770 + } 771 + 772 + int __init create_extent_cache(void) 773 + { 774 + extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 775 + sizeof(struct extent_tree)); 776 + if (!extent_tree_slab) 777 + return -ENOMEM; 778 + extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 779 + sizeof(struct extent_node)); 780 + if (!extent_node_slab) { 781 + kmem_cache_destroy(extent_tree_slab); 782 + return -ENOMEM; 783 + } 784 + return 0; 785 + } 786 + 787 + void destroy_extent_cache(void) 788 + { 789 + kmem_cache_destroy(extent_node_slab); 790 + kmem_cache_destroy(extent_tree_slab); 791 + }

+104 -30

fs/f2fs/f2fs.h

··· 19 19 #include <linux/magic.h> 20 20 #include <linux/kobject.h> 21 21 #include <linux/sched.h> 22 + #include <linux/bio.h> 22 23 23 24 #ifdef CONFIG_F2FS_CHECK_FS 24 25 #define f2fs_bug_on(sbi, condition) BUG_ON(condition) ··· 229 228 #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 230 229 #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 231 230 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 231 + #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) 232 232 233 233 #define F2FS_IOC_SET_ENCRYPTION_POLICY \ 234 234 _IOR('f', 19, struct f2fs_encryption_policy) ··· 322 320 */ 323 321 }; 324 322 325 - #define F2FS_LINK_MAX 32000 /* maximum link count per file */ 323 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ 326 324 327 325 #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ 328 326 ··· 351 349 nid_t ino; /* inode number */ 352 350 struct rb_root root; /* root of extent info rb-tree */ 353 351 struct extent_node *cached_en; /* recently accessed extent node */ 352 + struct extent_info largest; /* largested extent info */ 354 353 rwlock_t lock; /* protect extent info rb-tree */ 355 354 atomic_t refcount; /* reference count of rb-tree */ 356 355 unsigned int count; /* # of extent node in rb-tree*/ ··· 374 371 unsigned int m_len; 375 372 unsigned int m_flags; 376 373 }; 374 + 375 + /* for flag in get_data_block */ 376 + #define F2FS_GET_BLOCK_READ 0 377 + #define F2FS_GET_BLOCK_DIO 1 378 + #define F2FS_GET_BLOCK_FIEMAP 2 379 + #define F2FS_GET_BLOCK_BMAP 3 377 380 378 381 /* 379 382 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. ··· 429 420 unsigned int clevel; /* maximum level of given file name */ 430 421 nid_t i_xattr_nid; /* node id that contains xattrs */ 431 422 unsigned long long xattr_ver; /* cp version of xattr modification */ 432 - struct extent_info ext; /* in-memory extent cache entry */ 433 - rwlock_t ext_lock; /* rwlock for single extent cache */ 434 423 struct inode_entry *dirty_dir; /* the pointer of dirty dir */ 435 424 436 - struct radix_tree_root inmem_root; /* radix tree for inmem pages */ 437 425 struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 438 426 struct mutex inmem_lock; /* lock for inmemory pages */ 427 + 428 + struct extent_tree *extent_tree; /* cached extent_tree entry */ 439 429 440 430 #ifdef CONFIG_F2FS_FS_ENCRYPTION 441 431 /* Encryption params */ ··· 787 779 unsigned int segment_count[2]; /* # of allocated segments */ 788 780 unsigned int block_count[2]; /* # of allocated blocks */ 789 781 atomic_t inplace_count; /* # of inplace update */ 790 - int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ 782 + atomic_t total_hit_ext; /* # of lookup extent cache */ 783 + atomic_t read_hit_rbtree; /* # of hit rbtree extent node */ 784 + atomic_t read_hit_largest; /* # of hit largest extent node */ 785 + atomic_t read_hit_cached; /* # of hit cached extent node */ 786 + atomic_t inline_xattr; /* # of inline_xattr inodes */ 791 787 atomic_t inline_inode; /* # of inline_data inodes */ 792 788 atomic_t inline_dir; /* # of inline_dentry inodes */ 793 789 int bg_gc; /* background gc calls */ ··· 803 791 /* For sysfs suppport */ 804 792 struct kobject s_kobj; 805 793 struct completion s_kobj_unregister; 794 + 795 + /* For shrinker support */ 796 + struct list_head s_list; 797 + struct mutex umount_mutex; 798 + unsigned int shrinker_run_no; 806 799 }; 807 800 808 801 /* ··· 1056 1039 1057 1040 static inline void inode_dec_dirty_pages(struct inode *inode) 1058 1041 { 1059 - if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 1042 + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 1043 + !S_ISLNK(inode->i_mode)) 1060 1044 return; 1061 1045 1062 1046 atomic_dec(&F2FS_I(inode)->dirty_pages); ··· 1252 1234 gfp_t flags) 1253 1235 { 1254 1236 void *entry; 1255 - retry: 1256 - entry = kmem_cache_alloc(cachep, flags); 1257 - if (!entry) { 1258 - cond_resched(); 1259 - goto retry; 1260 - } 1261 1237 1238 + entry = kmem_cache_alloc(cachep, flags); 1239 + if (!entry) 1240 + entry = kmem_cache_alloc(cachep, flags | __GFP_NOFAIL); 1262 1241 return entry; 1242 + } 1243 + 1244 + static inline struct bio *f2fs_bio_alloc(int npages) 1245 + { 1246 + struct bio *bio; 1247 + 1248 + /* No failure on bio allocation */ 1249 + bio = bio_alloc(GFP_NOIO, npages); 1250 + if (!bio) 1251 + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); 1252 + return bio; 1263 1253 } 1264 1254 1265 1255 static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, ··· 1368 1342 FI_INC_LINK, /* need to increment i_nlink */ 1369 1343 FI_ACL_MODE, /* indicate acl mode */ 1370 1344 FI_NO_ALLOC, /* should not allocate any blocks */ 1345 + FI_FREE_NID, /* free allocated nide */ 1371 1346 FI_UPDATE_DIR, /* should update inode block for consistency */ 1372 1347 FI_DELAY_IPUT, /* used for the recovery */ 1373 1348 FI_NO_EXTENT, /* not to use the extent cache */ ··· 1568 1541 return false; 1569 1542 } 1570 1543 1544 + static inline bool f2fs_may_extent_tree(struct inode *inode) 1545 + { 1546 + mode_t mode = inode->i_mode; 1547 + 1548 + if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || 1549 + is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) 1550 + return false; 1551 + 1552 + return S_ISREG(mode); 1553 + } 1554 + 1571 1555 #define get_inode_mode(i) \ 1572 1556 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1573 1557 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) ··· 1595 1557 int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1596 1558 void truncate_data_blocks(struct dnode_of_data *); 1597 1559 int truncate_blocks(struct inode *, u64, bool); 1598 - void f2fs_truncate(struct inode *); 1560 + int f2fs_truncate(struct inode *, bool); 1599 1561 int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1600 1562 int f2fs_setattr(struct dentry *, struct iattr *); 1601 1563 int truncate_hole(struct inode *, pgoff_t, pgoff_t); ··· 1687 1649 int truncate_inode_blocks(struct inode *, pgoff_t); 1688 1650 int truncate_xattr_node(struct inode *, struct page *); 1689 1651 int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1690 - void remove_inode_page(struct inode *); 1652 + int remove_inode_page(struct inode *); 1691 1653 struct page *new_inode_page(struct inode *); 1692 1654 struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1693 1655 void ra_node_page(struct f2fs_sb_info *, nid_t); ··· 1698 1660 bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1699 1661 void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1700 1662 void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1663 + int try_to_free_nids(struct f2fs_sb_info *, int); 1701 1664 void recover_inline_xattr(struct inode *, struct page *); 1702 1665 void recover_xattr_data(struct inode *, struct page *, block_t); 1703 1666 int recover_inode_page(struct f2fs_sb_info *, struct page *); ··· 1714 1675 * segment.c 1715 1676 */ 1716 1677 void register_inmem_page(struct inode *, struct page *); 1717 - void commit_inmem_pages(struct inode *, bool); 1678 + int commit_inmem_pages(struct inode *, bool); 1718 1679 void f2fs_balance_fs(struct f2fs_sb_info *); 1719 1680 void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1720 1681 int f2fs_issue_flush(struct f2fs_sb_info *); ··· 1724 1685 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1725 1686 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 1726 1687 void release_discard_addrs(struct f2fs_sb_info *); 1727 - void discard_next_dnode(struct f2fs_sb_info *, block_t); 1688 + bool discard_next_dnode(struct f2fs_sb_info *, block_t); 1728 1689 int npages_for_summary_flush(struct f2fs_sb_info *, bool); 1729 1690 void allocate_new_segments(struct f2fs_sb_info *); 1730 1691 int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); ··· 1766 1727 void release_orphan_inode(struct f2fs_sb_info *); 1767 1728 void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1768 1729 void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1769 - void recover_orphan_inodes(struct f2fs_sb_info *); 1730 + int recover_orphan_inodes(struct f2fs_sb_info *); 1770 1731 int get_valid_checkpoint(struct f2fs_sb_info *); 1771 1732 void update_dirty_page(struct inode *, struct page *); 1772 1733 void add_dirty_dir_inode(struct inode *); ··· 1785 1746 void f2fs_submit_page_mbio(struct f2fs_io_info *); 1786 1747 void set_data_blkaddr(struct dnode_of_data *); 1787 1748 int reserve_new_block(struct dnode_of_data *); 1749 + int f2fs_get_block(struct dnode_of_data *, pgoff_t); 1788 1750 int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1789 - void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); 1790 - void f2fs_destroy_extent_tree(struct inode *); 1791 - void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); 1792 - void f2fs_update_extent_cache(struct dnode_of_data *); 1793 - void f2fs_preserve_extent_tree(struct inode *); 1794 1751 struct page *get_read_data_page(struct inode *, pgoff_t, int); 1795 1752 struct page *find_data_page(struct inode *, pgoff_t); 1796 1753 struct page *get_lock_data_page(struct inode *, pgoff_t); 1797 1754 struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1798 1755 int do_write_data_page(struct f2fs_io_info *); 1799 1756 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1800 - void init_extent_cache_info(struct f2fs_sb_info *); 1801 - int __init create_extent_cache(void); 1802 - void destroy_extent_cache(void); 1803 1757 void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1804 1758 int f2fs_release_page(struct page *, gfp_t); 1805 1759 ··· 1820 1788 struct f2fs_sb_info *sbi; 1821 1789 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1822 1790 int main_area_segs, main_area_sections, main_area_zones; 1823 - int hit_ext, total_ext, ext_tree, ext_node; 1791 + int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext; 1792 + int ext_tree, ext_node; 1824 1793 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1825 1794 int nats, dirty_nats, sits, dirty_sits, fnids; 1826 1795 int total_count, utilization; 1827 - int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; 1796 + int bg_gc, inmem_pages, wb_pages; 1797 + int inline_xattr, inline_inode, inline_dir; 1828 1798 unsigned int valid_count, valid_node_count, valid_inode_count; 1829 1799 unsigned int bimodal, avg_vblocks; 1830 1800 int util_free, util_valid, util_invalid; ··· 1857 1823 #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1858 1824 #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1859 1825 #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1860 - #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) 1861 - #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) 1826 + #define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext)) 1827 + #define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree)) 1828 + #define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest)) 1829 + #define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached)) 1830 + #define stat_inc_inline_xattr(inode) \ 1831 + do { \ 1832 + if (f2fs_has_inline_xattr(inode)) \ 1833 + (atomic_inc(&F2FS_I_SB(inode)->inline_xattr)); \ 1834 + } while (0) 1835 + #define stat_dec_inline_xattr(inode) \ 1836 + do { \ 1837 + if (f2fs_has_inline_xattr(inode)) \ 1838 + (atomic_dec(&F2FS_I_SB(inode)->inline_xattr)); \ 1839 + } while (0) 1862 1840 #define stat_inc_inline_inode(inode) \ 1863 1841 do { \ 1864 1842 if (f2fs_has_inline_data(inode)) \ ··· 1940 1894 #define stat_inc_dirty_dir(sbi) 1941 1895 #define stat_dec_dirty_dir(sbi) 1942 1896 #define stat_inc_total_hit(sb) 1943 - #define stat_inc_read_hit(sb) 1897 + #define stat_inc_rbtree_node_hit(sb) 1898 + #define stat_inc_largest_node_hit(sbi) 1899 + #define stat_inc_cached_node_hit(sbi) 1900 + #define stat_inc_inline_xattr(inode) 1901 + #define stat_dec_inline_xattr(inode) 1944 1902 #define stat_inc_inline_inode(inode) 1945 1903 #define stat_dec_inline_inode(inode) 1946 1904 #define stat_inc_inline_dir(inode) ··· 1998 1948 bool f2fs_empty_inline_dir(struct inode *); 1999 1949 int f2fs_read_inline_dir(struct file *, struct dir_context *, 2000 1950 struct f2fs_str *); 1951 + 1952 + /* 1953 + * shrinker.c 1954 + */ 1955 + unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *); 1956 + unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *); 1957 + void f2fs_join_shrinker(struct f2fs_sb_info *); 1958 + void f2fs_leave_shrinker(struct f2fs_sb_info *); 1959 + 1960 + /* 1961 + * extent_cache.c 1962 + */ 1963 + unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); 1964 + void f2fs_drop_largest_extent(struct inode *, pgoff_t); 1965 + void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); 1966 + unsigned int f2fs_destroy_extent_node(struct inode *); 1967 + void f2fs_destroy_extent_tree(struct inode *); 1968 + bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); 1969 + void f2fs_update_extent_cache(struct dnode_of_data *); 1970 + void f2fs_update_extent_cache_range(struct dnode_of_data *dn, 1971 + pgoff_t, block_t, unsigned int); 1972 + void init_extent_cache_info(struct f2fs_sb_info *); 1973 + int __init create_extent_cache(void); 1974 + void destroy_extent_cache(void); 2001 1975 2002 1976 /* 2003 1977 * crypto support

+142 -43

fs/f2fs/file.c

··· 27 27 #include "segment.h" 28 28 #include "xattr.h" 29 29 #include "acl.h" 30 + #include "gc.h" 30 31 #include "trace.h" 31 32 #include <trace/events/f2fs.h> 32 33 ··· 86 85 mapped: 87 86 /* fill the page */ 88 87 f2fs_wait_on_page_writeback(page, DATA); 88 + /* if gced page is attached, don't write to cold segment */ 89 + clear_cold_data(page); 89 90 out: 90 91 sb_end_pagefault(inode->i_sb); 91 92 return block_page_mkwrite_return(err); ··· 206 203 } 207 204 208 205 /* if the inode is dirty, let's recover all the time */ 209 - if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { 210 - update_inode_page(inode); 206 + if (!datasync) { 207 + f2fs_write_inode(inode, NULL); 211 208 goto go_write; 212 209 } 213 210 ··· 445 442 446 443 int truncate_data_blocks_range(struct dnode_of_data *dn, int count) 447 444 { 448 - int nr_free = 0, ofs = dn->ofs_in_node; 449 445 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 450 446 struct f2fs_node *raw_node; 447 + int nr_free = 0, ofs = dn->ofs_in_node, len = count; 451 448 __le32 *addr; 452 449 453 450 raw_node = F2FS_NODE(dn->node_page); ··· 460 457 461 458 dn->data_blkaddr = NULL_ADDR; 462 459 set_data_blkaddr(dn); 463 - f2fs_update_extent_cache(dn); 464 460 invalidate_blocks(sbi, blkaddr); 465 461 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) 466 462 clear_inode_flag(F2FS_I(dn->inode), 467 463 FI_FIRST_BLOCK_WRITTEN); 468 464 nr_free++; 469 465 } 466 + 470 467 if (nr_free) { 468 + pgoff_t fofs; 469 + /* 470 + * once we invalidate valid blkaddr in range [ofs, ofs + count], 471 + * we will invalidate all blkaddr in the whole range. 472 + */ 473 + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), 474 + F2FS_I(dn->inode)) + ofs; 475 + f2fs_update_extent_cache_range(dn, fofs, 0, len); 471 476 dec_valid_block_count(sbi, dn->inode, nr_free); 472 477 set_page_dirty(dn->node_page); 473 478 sync_inode_page(dn); ··· 587 576 return err; 588 577 } 589 578 590 - void f2fs_truncate(struct inode *inode) 579 + int f2fs_truncate(struct inode *inode, bool lock) 591 580 { 581 + int err; 582 + 592 583 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 593 584 S_ISLNK(inode->i_mode))) 594 - return; 585 + return 0; 595 586 596 587 trace_f2fs_truncate(inode); 597 588 598 589 /* we should check inline_data size */ 599 590 if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { 600 - if (f2fs_convert_inline_inode(inode)) 601 - return; 591 + err = f2fs_convert_inline_inode(inode); 592 + if (err) 593 + return err; 602 594 } 603 595 604 - if (!truncate_blocks(inode, i_size_read(inode), true)) { 605 - inode->i_mtime = inode->i_ctime = CURRENT_TIME; 606 - mark_inode_dirty(inode); 607 - } 596 + err = truncate_blocks(inode, i_size_read(inode), lock); 597 + if (err) 598 + return err; 599 + 600 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 601 + mark_inode_dirty(inode); 602 + return 0; 608 603 } 609 604 610 605 int f2fs_getattr(struct vfsmount *mnt, ··· 670 653 671 654 if (attr->ia_size <= i_size_read(inode)) { 672 655 truncate_setsize(inode, attr->ia_size); 673 - f2fs_truncate(inode); 656 + err = f2fs_truncate(inode, true); 657 + if (err) 658 + return err; 674 659 f2fs_balance_fs(F2FS_I_SB(inode)); 675 660 } else { 676 661 /* ··· 711 692 .fiemap = f2fs_fiemap, 712 693 }; 713 694 714 - static void fill_zero(struct inode *inode, pgoff_t index, 695 + static int fill_zero(struct inode *inode, pgoff_t index, 715 696 loff_t start, loff_t len) 716 697 { 717 698 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 718 699 struct page *page; 719 700 720 701 if (!len) 721 - return; 702 + return 0; 722 703 723 704 f2fs_balance_fs(sbi); 724 705 ··· 726 707 page = get_new_data_page(inode, NULL, index, false); 727 708 f2fs_unlock_op(sbi); 728 709 729 - if (!IS_ERR(page)) { 730 - f2fs_wait_on_page_writeback(page, DATA); 731 - zero_user(page, start, len); 732 - set_page_dirty(page); 733 - f2fs_put_page(page, 1); 734 - } 710 + if (IS_ERR(page)) 711 + return PTR_ERR(page); 712 + 713 + f2fs_wait_on_page_writeback(page, DATA); 714 + zero_user(page, start, len); 715 + set_page_dirty(page); 716 + f2fs_put_page(page, 1); 717 + return 0; 735 718 } 736 719 737 720 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) ··· 781 760 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); 782 761 783 762 if (pg_start == pg_end) { 784 - fill_zero(inode, pg_start, off_start, 763 + ret = fill_zero(inode, pg_start, off_start, 785 764 off_end - off_start); 765 + if (ret) 766 + return ret; 786 767 } else { 787 - if (off_start) 788 - fill_zero(inode, pg_start++, off_start, 789 - PAGE_CACHE_SIZE - off_start); 790 - if (off_end) 791 - fill_zero(inode, pg_end, 0, off_end); 768 + if (off_start) { 769 + ret = fill_zero(inode, pg_start++, off_start, 770 + PAGE_CACHE_SIZE - off_start); 771 + if (ret) 772 + return ret; 773 + } 774 + if (off_end) { 775 + ret = fill_zero(inode, pg_end, 0, off_end); 776 + if (ret) 777 + return ret; 778 + } 792 779 793 780 if (pg_start < pg_end) { 794 781 struct address_space *mapping = inode->i_mapping; ··· 826 797 pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; 827 798 int ret = 0; 828 799 829 - f2fs_lock_op(sbi); 830 - 831 800 for (; end < nrpages; start++, end++) { 832 801 block_t new_addr, old_addr; 802 + 803 + f2fs_lock_op(sbi); 833 804 834 805 set_new_dnode(&dn, inode, NULL, NULL, 0); 835 806 ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); ··· 846 817 if (new_addr == NULL_ADDR) { 847 818 set_new_dnode(&dn, inode, NULL, NULL, 0); 848 819 ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); 849 - if (ret && ret != -ENOENT) 820 + if (ret && ret != -ENOENT) { 850 821 goto out; 851 - else if (ret == -ENOENT) 822 + } else if (ret == -ENOENT) { 823 + f2fs_unlock_op(sbi); 852 824 continue; 825 + } 853 826 854 827 if (dn.data_blkaddr == NULL_ADDR) { 855 828 f2fs_put_dnode(&dn); 829 + f2fs_unlock_op(sbi); 856 830 continue; 857 831 } else { 858 832 truncate_data_blocks_range(&dn, 1); ··· 894 862 895 863 f2fs_put_dnode(&dn); 896 864 } 865 + f2fs_unlock_op(sbi); 897 866 } 898 - ret = 0; 867 + return 0; 899 868 out: 900 869 f2fs_unlock_op(sbi); 901 870 return ret; ··· 917 884 /* collapse range should be aligned to block size of f2fs. */ 918 885 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 919 886 return -EINVAL; 887 + 888 + f2fs_balance_fs(F2FS_I_SB(inode)); 889 + 890 + if (f2fs_has_inline_data(inode)) { 891 + ret = f2fs_convert_inline_inode(inode); 892 + if (ret) 893 + return ret; 894 + } 920 895 921 896 pg_start = offset >> PAGE_CACHE_SHIFT; 922 897 pg_end = (offset + len) >> PAGE_CACHE_SHIFT; ··· 987 946 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); 988 947 989 948 if (pg_start == pg_end) { 990 - fill_zero(inode, pg_start, off_start, off_end - off_start); 949 + ret = fill_zero(inode, pg_start, off_start, 950 + off_end - off_start); 951 + if (ret) 952 + return ret; 953 + 991 954 if (offset + len > new_size) 992 955 new_size = offset + len; 993 956 new_size = max_t(loff_t, new_size, offset + len); 994 957 } else { 995 958 if (off_start) { 996 - fill_zero(inode, pg_start++, off_start, 997 - PAGE_CACHE_SIZE - off_start); 959 + ret = fill_zero(inode, pg_start++, off_start, 960 + PAGE_CACHE_SIZE - off_start); 961 + if (ret) 962 + return ret; 963 + 998 964 new_size = max_t(loff_t, new_size, 999 965 pg_start << PAGE_CACHE_SHIFT); 1000 966 } ··· 1043 995 } 1044 996 1045 997 if (off_end) { 1046 - fill_zero(inode, pg_end, 0, off_end); 998 + ret = fill_zero(inode, pg_end, 0, off_end); 999 + if (ret) 1000 + goto out; 1001 + 1047 1002 new_size = max_t(loff_t, new_size, offset + len); 1048 1003 } 1049 1004 } ··· 1083 1032 return -EINVAL; 1084 1033 1085 1034 f2fs_balance_fs(sbi); 1035 + 1036 + if (f2fs_has_inline_data(inode)) { 1037 + ret = f2fs_convert_inline_inode(inode); 1038 + if (ret) 1039 + return ret; 1040 + } 1086 1041 1087 1042 ret = truncate_blocks(inode, i_size_read(inode), true); 1088 1043 if (ret) ··· 1359 1302 static int f2fs_ioc_start_atomic_write(struct file *filp) 1360 1303 { 1361 1304 struct inode *inode = file_inode(filp); 1305 + int ret; 1362 1306 1363 1307 if (!inode_owner_or_capable(inode)) 1364 1308 return -EACCES; ··· 1369 1311 if (f2fs_is_atomic_file(inode)) 1370 1312 return 0; 1371 1313 1372 - set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1314 + ret = f2fs_convert_inline_inode(inode); 1315 + if (ret) 1316 + return ret; 1373 1317 1374 - return f2fs_convert_inline_inode(inode); 1318 + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1319 + return 0; 1375 1320 } 1376 1321 1377 1322 static int f2fs_ioc_commit_atomic_write(struct file *filp) ··· 1394 1333 1395 1334 if (f2fs_is_atomic_file(inode)) { 1396 1335 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1397 - commit_inmem_pages(inode, false); 1336 + ret = commit_inmem_pages(inode, false); 1337 + if (ret) 1338 + goto err_out; 1398 1339 } 1399 1340 1400 - ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); 1341 + ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1342 + err_out: 1401 1343 mnt_drop_write_file(filp); 1402 1344 return ret; 1403 1345 } ··· 1408 1344 static int f2fs_ioc_start_volatile_write(struct file *filp) 1409 1345 { 1410 1346 struct inode *inode = file_inode(filp); 1347 + int ret; 1411 1348 1412 1349 if (!inode_owner_or_capable(inode)) 1413 1350 return -EACCES; ··· 1416 1351 if (f2fs_is_volatile_file(inode)) 1417 1352 return 0; 1418 1353 1419 - set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1354 + ret = f2fs_convert_inline_inode(inode); 1355 + if (ret) 1356 + return ret; 1420 1357 1421 - return f2fs_convert_inline_inode(inode); 1358 + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1359 + return 0; 1422 1360 } 1423 1361 1424 1362 static int f2fs_ioc_release_volatile_write(struct file *filp) ··· 1457 1389 1458 1390 if (f2fs_is_atomic_file(inode)) { 1459 1391 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1460 - commit_inmem_pages(inode, false); 1392 + commit_inmem_pages(inode, true); 1461 1393 } 1462 1394 1463 1395 if (f2fs_is_volatile_file(inode)) ··· 1612 1544 return 0; 1613 1545 } 1614 1546 1547 + static int f2fs_ioc_gc(struct file *filp, unsigned long arg) 1548 + { 1549 + struct inode *inode = file_inode(filp); 1550 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1551 + __u32 i, count; 1552 + 1553 + if (!capable(CAP_SYS_ADMIN)) 1554 + return -EPERM; 1555 + 1556 + if (get_user(count, (__u32 __user *)arg)) 1557 + return -EFAULT; 1558 + 1559 + if (!count || count > F2FS_BATCH_GC_MAX_NUM) 1560 + return -EINVAL; 1561 + 1562 + for (i = 0; i < count; i++) { 1563 + if (!mutex_trylock(&sbi->gc_mutex)) 1564 + break; 1565 + 1566 + if (f2fs_gc(sbi)) 1567 + break; 1568 + } 1569 + 1570 + if (put_user(i, (__u32 __user *)arg)) 1571 + return -EFAULT; 1572 + 1573 + return 0; 1574 + } 1575 + 1615 1576 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 1616 1577 { 1617 1578 switch (cmd) { ··· 1670 1573 return f2fs_ioc_get_encryption_policy(filp, arg); 1671 1574 case F2FS_IOC_GET_ENCRYPTION_PWSALT: 1672 1575 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 1576 + case F2FS_IOC_GARBAGE_COLLECT: 1577 + return f2fs_ioc_gc(filp, arg); 1673 1578 default: 1674 1579 return -ENOTTY; 1675 1580 }

+45 -36

fs/f2fs/gc.c

··· 391 391 * On validity, copy that node with cold status, otherwise (invalid node) 392 392 * ignore that. 393 393 */ 394 - static void gc_node_segment(struct f2fs_sb_info *sbi, 394 + static int gc_node_segment(struct f2fs_sb_info *sbi, 395 395 struct f2fs_summary *sum, unsigned int segno, int gc_type) 396 396 { 397 397 bool initial = true; 398 398 struct f2fs_summary *entry; 399 + block_t start_addr; 399 400 int off; 401 + 402 + start_addr = START_BLOCK(sbi, segno); 400 403 401 404 next_step: 402 405 entry = sum; ··· 407 404 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 408 405 nid_t nid = le32_to_cpu(entry->nid); 409 406 struct page *node_page; 407 + struct node_info ni; 410 408 411 409 /* stop BG_GC if there is not enough free sections. */ 412 410 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 413 - return; 411 + return 0; 414 412 415 413 if (check_valid_map(sbi, segno, off) == 0) 416 414 continue; ··· 426 422 427 423 /* block may become invalid during get_node_page */ 428 424 if (check_valid_map(sbi, segno, off) == 0) { 425 + f2fs_put_page(node_page, 1); 426 + continue; 427 + } 428 + 429 + get_node_info(sbi, nid, &ni); 430 + if (ni.blk_addr != start_addr + off) { 429 431 f2fs_put_page(node_page, 1); 430 432 continue; 431 433 } ··· 461 451 }; 462 452 sync_node_pages(sbi, 0, &wbc); 463 453 464 - /* 465 - * In the case of FG_GC, it'd be better to reclaim this victim 466 - * completely. 467 - */ 468 - if (get_valid_blocks(sbi, segno, 1) != 0) 469 - goto next_step; 454 + /* return 1 only if FG_GC succefully reclaimed one */ 455 + if (get_valid_blocks(sbi, segno, 1) == 0) 456 + return 1; 470 457 } 458 + return 0; 471 459 } 472 460 473 461 /* ··· 495 487 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); 496 488 } 497 489 498 - static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 490 + static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 499 491 struct node_info *dni, block_t blkaddr, unsigned int *nofs) 500 492 { 501 493 struct page *node_page; ··· 508 500 509 501 node_page = get_node_page(sbi, nid); 510 502 if (IS_ERR(node_page)) 511 - return 0; 503 + return false; 512 504 513 505 get_node_info(sbi, nid, dni); 514 506 515 507 if (sum->version != dni->version) { 516 508 f2fs_put_page(node_page, 1); 517 - return 0; 509 + return false; 518 510 } 519 511 520 512 *nofs = ofs_of_node(node_page); ··· 522 514 f2fs_put_page(node_page, 1); 523 515 524 516 if (source_blkaddr != blkaddr) 525 - return 0; 526 - return 1; 517 + return false; 518 + return true; 527 519 } 528 520 529 521 static void move_encrypted_block(struct inode *inode, block_t bidx) ··· 560 552 fio.page = page; 561 553 fio.blk_addr = dn.data_blkaddr; 562 554 563 - fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); 555 + fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), 556 + fio.blk_addr, 557 + FGP_LOCK|FGP_CREAT, 558 + GFP_NOFS); 564 559 if (!fio.encrypted_page) 565 560 goto put_out; 566 561 ··· 647 636 * If the parent node is not valid or the data block address is different, 648 637 * the victim data block is ignored. 649 638 */ 650 - static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 639 + static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 651 640 struct gc_inode_list *gc_list, unsigned int segno, int gc_type) 652 641 { 653 642 struct super_block *sb = sbi->sb; ··· 670 659 671 660 /* stop BG_GC if there is not enough free sections. */ 672 661 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 673 - return; 662 + return 0; 674 663 675 664 if (check_valid_map(sbi, segno, off) == 0) 676 665 continue; ··· 681 670 } 682 671 683 672 /* Get an inode by ino with checking validity */ 684 - if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) 673 + if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) 685 674 continue; 686 675 687 676 if (phase == 1) { ··· 735 724 if (gc_type == FG_GC) { 736 725 f2fs_submit_merged_bio(sbi, DATA, WRITE); 737 726 738 - /* 739 - * In the case of FG_GC, it'd be better to reclaim this victim 740 - * completely. 741 - */ 742 - if (get_valid_blocks(sbi, segno, 1) != 0) { 743 - phase = 2; 744 - goto next_step; 745 - } 727 + /* return 1 only if FG_GC succefully reclaimed one */ 728 + if (get_valid_blocks(sbi, segno, 1) == 0) 729 + return 1; 746 730 } 731 + return 0; 747 732 } 748 733 749 734 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, ··· 755 748 return ret; 756 749 } 757 750 758 - static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 751 + static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 759 752 struct gc_inode_list *gc_list, int gc_type) 760 753 { 761 754 struct page *sum_page; 762 755 struct f2fs_summary_block *sum; 763 756 struct blk_plug plug; 757 + int nfree = 0; 764 758 765 759 /* read segment summary of victim */ 766 760 sum_page = get_sum_page(sbi, segno); ··· 781 773 782 774 switch (GET_SUM_TYPE((&sum->footer))) { 783 775 case SUM_TYPE_NODE: 784 - gc_node_segment(sbi, sum->entries, segno, gc_type); 776 + nfree = gc_node_segment(sbi, sum->entries, segno, gc_type); 785 777 break; 786 778 case SUM_TYPE_DATA: 787 - gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); 779 + nfree = gc_data_segment(sbi, sum->entries, gc_list, 780 + segno, gc_type); 788 781 break; 789 782 } 790 783 blk_finish_plug(&plug); ··· 794 785 stat_inc_call_count(sbi->stat_info); 795 786 796 787 f2fs_put_page(sum_page, 0); 788 + return nfree; 797 789 } 798 790 799 791 int f2fs_gc(struct f2fs_sb_info *sbi) 800 792 { 801 - unsigned int segno, i; 793 + unsigned int segno = NULL_SEGNO; 794 + unsigned int i; 802 795 int gc_type = BG_GC; 803 796 int nfree = 0; 804 797 int ret = -1; ··· 819 808 820 809 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 821 810 gc_type = FG_GC; 822 - write_checkpoint(sbi, &cpc); 811 + if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) 812 + write_checkpoint(sbi, &cpc); 823 813 } 824 814 825 - if (!__get_victim(sbi, &segno, gc_type)) 815 + if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) 826 816 goto stop; 827 817 ret = 0; 828 818 ··· 833 821 META_SSA); 834 822 835 823 for (i = 0; i < sbi->segs_per_sec; i++) 836 - do_garbage_collect(sbi, segno + i, &gc_list, gc_type); 824 + nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type); 837 825 838 - if (gc_type == FG_GC) { 826 + if (gc_type == FG_GC) 839 827 sbi->cur_victim_sec = NULL_SEGNO; 840 - nfree++; 841 - WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); 842 - } 843 828 844 829 if (has_not_enough_free_secs(sbi, nfree)) 845 830 goto gc_more;

+6

fs/f2fs/gc.h

··· 19 19 #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ 20 20 #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 21 21 22 + /* 23 + * with this macro, we can control the max time we do garbage collection, 24 + * when user triggers batch mode gc by ioctl. 25 + */ 26 + #define F2FS_BATCH_GC_MAX_NUM 16 27 + 22 28 /* Search max. number of dirty segments to select a victim segment */ 23 29 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 24 30

+19 -4

fs/f2fs/inline.c

··· 360 360 return 0; 361 361 } 362 362 363 + /* 364 + * NOTE: ipage is grabbed by caller, but if any error occurs, we should 365 + * release ipage in this function. 366 + */ 363 367 static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 364 368 struct f2fs_inline_dentry *inline_dentry) 365 369 { ··· 373 369 int err; 374 370 375 371 page = grab_cache_page(dir->i_mapping, 0); 376 - if (!page) 372 + if (!page) { 373 + f2fs_put_page(ipage, 1); 377 374 return -ENOMEM; 375 + } 378 376 379 377 set_new_dnode(&dn, dir, ipage, NULL, 0); 380 378 err = f2fs_reserve_block(&dn, 0); ··· 384 378 goto out; 385 379 386 380 f2fs_wait_on_page_writeback(page, DATA); 387 - zero_user_segment(page, 0, PAGE_CACHE_SIZE); 381 + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 388 382 389 383 dentry_blk = kmap_atomic(page); 390 384 391 385 /* copy data from inline dentry block to new dentry block */ 392 386 memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, 393 387 INLINE_DENTRY_BITMAP_SIZE); 388 + memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0, 389 + SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE); 390 + /* 391 + * we do not need to zero out remainder part of dentry and filename 392 + * field, since we have used bitmap for marking the usage status of 393 + * them, besides, we can also ignore copying/zeroing reserved space 394 + * of dentry block, because them haven't been used so far. 395 + */ 394 396 memcpy(dentry_blk->dentry, inline_dentry->dentry, 395 397 sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); 396 398 memcpy(dentry_blk->filename, inline_dentry->filename, ··· 448 434 slots, NR_INLINE_DENTRY); 449 435 if (bit_pos >= NR_INLINE_DENTRY) { 450 436 err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); 451 - if (!err) 452 - err = -EAGAIN; 437 + if (err) 438 + return err; 439 + err = -EAGAIN; 453 440 goto out; 454 441 } 455 442

+69 -28

fs/f2fs/inode.c

··· 12 12 #include <linux/f2fs_fs.h> 13 13 #include <linux/buffer_head.h> 14 14 #include <linux/writeback.h> 15 - #include <linux/bitops.h> 16 15 17 16 #include "f2fs.h" 18 17 #include "node.h" ··· 33 34 new_fl |= S_NOATIME; 34 35 if (flags & FS_DIRSYNC_FL) 35 36 new_fl |= S_DIRSYNC; 36 - set_mask_bits(&inode->i_flags, 37 - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); 37 + inode_set_flags(inode, new_fl, 38 + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 38 39 } 39 40 40 41 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) ··· 138 139 fi->i_pino = le32_to_cpu(ri->i_pino); 139 140 fi->i_dir_level = ri->i_dir_level; 140 141 141 - f2fs_init_extent_cache(inode, &ri->i_ext); 142 + f2fs_init_extent_tree(inode, &ri->i_ext); 142 143 143 144 get_inline_info(fi, ri); 144 145 ··· 154 155 155 156 f2fs_put_page(node_page, 1); 156 157 158 + stat_inc_inline_xattr(inode); 157 159 stat_inc_inline_inode(inode); 158 160 stat_inc_inline_dir(inode); 159 161 ··· 237 237 ri->i_size = cpu_to_le64(i_size_read(inode)); 238 238 ri->i_blocks = cpu_to_le64(inode->i_blocks); 239 239 240 - read_lock(&F2FS_I(inode)->ext_lock); 241 - set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); 242 - read_unlock(&F2FS_I(inode)->ext_lock); 243 - 240 + if (F2FS_I(inode)->extent_tree) 241 + set_raw_extent(&F2FS_I(inode)->extent_tree->largest, 242 + &ri->i_ext); 243 + else 244 + memset(&ri->i_ext, 0, sizeof(ri->i_ext)); 244 245 set_raw_inline(F2FS_I(inode), ri); 245 246 246 247 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ··· 315 314 void f2fs_evict_inode(struct inode *inode) 316 315 { 317 316 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 318 - nid_t xnid = F2FS_I(inode)->i_xattr_nid; 317 + struct f2fs_inode_info *fi = F2FS_I(inode); 318 + nid_t xnid = fi->i_xattr_nid; 319 + int err = 0; 319 320 320 321 /* some remained atomic pages should discarded */ 321 322 if (f2fs_is_atomic_file(inode)) ··· 333 330 f2fs_bug_on(sbi, get_dirty_pages(inode)); 334 331 remove_dirty_dir_inode(inode); 335 332 333 + f2fs_destroy_extent_tree(inode); 334 + 336 335 if (inode->i_nlink || is_bad_inode(inode)) 337 336 goto no_delete; 338 337 339 338 sb_start_intwrite(inode->i_sb); 340 - set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); 339 + set_inode_flag(fi, FI_NO_ALLOC); 341 340 i_size_write(inode, 0); 342 341 343 342 if (F2FS_HAS_BLOCKS(inode)) 344 - f2fs_truncate(inode); 343 + err = f2fs_truncate(inode, true); 345 344 346 - f2fs_lock_op(sbi); 347 - remove_inode_page(inode); 348 - f2fs_unlock_op(sbi); 345 + if (!err) { 346 + f2fs_lock_op(sbi); 347 + err = remove_inode_page(inode); 348 + f2fs_unlock_op(sbi); 349 + } 349 350 350 351 sb_end_intwrite(inode->i_sb); 351 352 no_delete: 353 + stat_dec_inline_xattr(inode); 352 354 stat_dec_inline_dir(inode); 353 355 stat_dec_inline_inode(inode); 354 - 355 - /* update extent info in inode */ 356 - if (inode->i_nlink) 357 - f2fs_preserve_extent_tree(inode); 358 - f2fs_destroy_extent_tree(inode); 359 356 360 357 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 361 358 if (xnid) 362 359 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 363 - if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) 360 + if (is_inode_flag_set(fi, FI_APPEND_WRITE)) 364 361 add_dirty_inode(sbi, inode->i_ino, APPEND_INO); 365 - if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) 362 + if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) 366 363 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 364 + if (is_inode_flag_set(fi, FI_FREE_NID)) { 365 + if (err && err != -ENOENT) 366 + alloc_nid_done(sbi, inode->i_ino); 367 + else 368 + alloc_nid_failed(sbi, inode->i_ino); 369 + clear_inode_flag(fi, FI_FREE_NID); 370 + } 371 + 372 + if (err && err != -ENOENT) { 373 + if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) { 374 + /* 375 + * get here because we failed to release resource 376 + * of inode previously, reminder our user to run fsck 377 + * for fixing. 378 + */ 379 + set_sbi_flag(sbi, SBI_NEED_FSCK); 380 + f2fs_msg(sbi->sb, KERN_WARNING, 381 + "inode (ino:%lu) resource leak, run fsck " 382 + "to fix this issue!", inode->i_ino); 383 + } 384 + } 367 385 out_clear: 368 386 #ifdef CONFIG_F2FS_FS_ENCRYPTION 369 - if (F2FS_I(inode)->i_crypt_info) 370 - f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); 387 + if (fi->i_crypt_info) 388 + f2fs_free_encryption_info(inode, fi->i_crypt_info); 371 389 #endif 372 390 clear_inode(inode); 373 391 } ··· 397 373 void handle_failed_inode(struct inode *inode) 398 374 { 399 375 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 376 + int err = 0; 400 377 401 378 clear_nlink(inode); 402 379 make_bad_inode(inode); ··· 405 380 406 381 i_size_write(inode, 0); 407 382 if (F2FS_HAS_BLOCKS(inode)) 408 - f2fs_truncate(inode); 383 + err = f2fs_truncate(inode, false); 409 384 410 - remove_inode_page(inode); 385 + if (!err) 386 + err = remove_inode_page(inode); 411 387 412 - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 413 - clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); 414 - alloc_nid_failed(sbi, inode->i_ino); 388 + /* 389 + * if we skip truncate_node in remove_inode_page bacause we failed 390 + * before, it's better to find another way to release resource of 391 + * this inode (e.g. valid block count, node block or nid). Here we 392 + * choose to add this inode to orphan list, so that we can call iput 393 + * for releasing in orphan recovery flow. 394 + * 395 + * Note: we should add inode to orphan list before f2fs_unlock_op() 396 + * so we can prevent losing this orphan when encoutering checkpoint 397 + * and following suddenly power-off. 398 + */ 399 + if (err && err != -ENOENT) { 400 + err = acquire_orphan_inode(sbi); 401 + if (!err) 402 + add_orphan_inode(sbi, inode->i_ino); 403 + } 404 + 405 + set_inode_flag(F2FS_I(inode), FI_FREE_NID); 415 406 f2fs_unlock_op(sbi); 416 407 417 408 /* iput will drop the inode object */

+14 -7

fs/f2fs/namei.c

··· 53 53 if (err) { 54 54 err = -EINVAL; 55 55 nid_free = true; 56 - goto out; 56 + goto fail; 57 57 } 58 58 59 59 /* If the directory encrypted, then we should encrypt the inode. */ ··· 65 65 if (f2fs_may_inline_dentry(inode)) 66 66 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); 67 67 68 + f2fs_init_extent_tree(inode, NULL); 69 + 70 + stat_inc_inline_xattr(inode); 68 71 stat_inc_inline_inode(inode); 69 72 stat_inc_inline_dir(inode); 70 73 ··· 75 72 mark_inode_dirty(inode); 76 73 return inode; 77 74 78 - out: 79 - clear_nlink(inode); 80 - unlock_new_inode(inode); 81 75 fail: 82 76 trace_f2fs_new_inode(inode, err); 83 77 make_bad_inode(inode); 84 - iput(inode); 85 78 if (nid_free) 86 - alloc_nid_failed(sbi, ino); 79 + set_inode_flag(F2FS_I(inode), FI_FREE_NID); 80 + iput(inode); 87 81 return ERR_PTR(err); 88 82 } 89 83 ··· 89 89 size_t slen = strlen(s); 90 90 size_t sublen = strlen(sub); 91 91 92 - if (sublen > slen) 92 + /* 93 + * filename format of multimedia file should be defined as: 94 + * "filename + '.' + extension". 95 + */ 96 + if (slen < sublen + 2) 97 + return 0; 98 + 99 + if (s[slen - sublen - 1] != '.') 93 100 return 0; 94 101 95 102 return !strncasecmp(s + slen - sublen, sub, sublen);

+64 -22

fs/f2fs/node.c

··· 159 159 160 160 head = radix_tree_lookup(&nm_i->nat_set_root, set); 161 161 if (!head) { 162 - head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); 162 + head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); 163 163 164 164 INIT_LIST_HEAD(&head->entry_list); 165 165 INIT_LIST_HEAD(&head->set_list); ··· 246 246 { 247 247 struct nat_entry *new; 248 248 249 - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); 249 + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); 250 250 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 251 251 memset(new, 0, sizeof(struct nat_entry)); 252 252 nat_set_nid(new, nid); ··· 306 306 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 307 307 unsigned char version = nat_get_version(e); 308 308 nat_set_version(e, inc_node_version(version)); 309 + 310 + /* in order to reuse the nid */ 311 + if (nm_i->next_scan_nid > ni->nid) 312 + nm_i->next_scan_nid = ni->nid; 309 313 } 310 314 311 315 /* change address */ ··· 332 328 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 333 329 { 334 330 struct f2fs_nm_info *nm_i = NM_I(sbi); 331 + int nr = nr_shrink; 335 332 336 - if (available_free_memory(sbi, NAT_ENTRIES)) 333 + if (!down_write_trylock(&nm_i->nat_tree_lock)) 337 334 return 0; 338 335 339 - down_write(&nm_i->nat_tree_lock); 340 336 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 341 337 struct nat_entry *ne; 342 338 ne = list_first_entry(&nm_i->nat_entries, ··· 345 341 nr_shrink--; 346 342 } 347 343 up_write(&nm_i->nat_tree_lock); 348 - return nr_shrink; 344 + return nr - nr_shrink; 349 345 } 350 346 351 347 /* ··· 902 898 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 903 899 * f2fs_unlock_op(). 904 900 */ 905 - void remove_inode_page(struct inode *inode) 901 + int remove_inode_page(struct inode *inode) 906 902 { 907 903 struct dnode_of_data dn; 904 + int err; 908 905 909 906 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 910 - if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) 911 - return; 907 + err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); 908 + if (err) 909 + return err; 912 910 913 - if (truncate_xattr_node(inode, dn.inode_page)) { 911 + err = truncate_xattr_node(inode, dn.inode_page); 912 + if (err) { 914 913 f2fs_put_dnode(&dn); 915 - return; 914 + return err; 916 915 } 917 916 918 917 /* remove potential inline_data blocks */ ··· 929 922 930 923 /* will put inode & node pages */ 931 924 truncate_node(&dn); 925 + return 0; 932 926 } 933 927 934 928 struct page *new_inode_page(struct inode *inode) ··· 999 991 /* 1000 992 * Caller should do after getting the following values. 1001 993 * 0: f2fs_put_page(page, 0) 1002 - * LOCKED_PAGE: f2fs_put_page(page, 1) 1003 - * error: nothing 994 + * LOCKED_PAGE or error: f2fs_put_page(page, 1) 1004 995 */ 1005 996 static int read_node_page(struct page *page, int rw) 1006 997 { ··· 1017 1010 1018 1011 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1019 1012 ClearPageUptodate(page); 1020 - f2fs_put_page(page, 1); 1021 1013 return -ENOENT; 1022 1014 } 1023 1015 ··· 1047 1041 return; 1048 1042 1049 1043 err = read_node_page(apage, READA); 1050 - if (err == 0) 1051 - f2fs_put_page(apage, 0); 1052 - else if (err == LOCKED_PAGE) 1053 - f2fs_put_page(apage, 1); 1044 + f2fs_put_page(apage, err ? 1 : 0); 1054 1045 } 1055 1046 1056 1047 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) ··· 1060 1057 return ERR_PTR(-ENOMEM); 1061 1058 1062 1059 err = read_node_page(page, READ_SYNC); 1063 - if (err < 0) 1060 + if (err < 0) { 1061 + f2fs_put_page(page, 1); 1064 1062 return ERR_PTR(err); 1065 - else if (err != LOCKED_PAGE) 1063 + } else if (err != LOCKED_PAGE) { 1066 1064 lock_page(page); 1065 + } 1067 1066 1068 1067 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { 1069 1068 ClearPageUptodate(page); ··· 1101 1096 return ERR_PTR(-ENOMEM); 1102 1097 1103 1098 err = read_node_page(page, READ_SYNC); 1104 - if (err < 0) 1099 + if (err < 0) { 1100 + f2fs_put_page(page, 1); 1105 1101 return ERR_PTR(err); 1106 - else if (err == LOCKED_PAGE) 1102 + } else if (err == LOCKED_PAGE) { 1107 1103 goto page_hit; 1104 + } 1108 1105 1109 1106 blk_start_plug(&plug); 1110 1107 ··· 1540 1533 if (unlikely(nid >= nm_i->max_nid)) 1541 1534 nid = 0; 1542 1535 1543 - if (i++ == FREE_NID_PAGES) 1536 + if (++i >= FREE_NID_PAGES) 1544 1537 break; 1545 1538 } 1546 1539 ··· 1577 1570 1578 1571 /* We should not use stale free nids created by build_free_nids */ 1579 1572 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1573 + struct node_info ni; 1574 + 1580 1575 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1581 1576 list_for_each_entry(i, &nm_i->free_nid_list, list) 1582 1577 if (i->state == NID_NEW) ··· 1589 1580 i->state = NID_ALLOC; 1590 1581 nm_i->fcnt--; 1591 1582 spin_unlock(&nm_i->free_nid_list_lock); 1583 + 1584 + /* check nid is allocated already */ 1585 + get_node_info(sbi, *nid, &ni); 1586 + if (ni.blk_addr != NULL_ADDR) { 1587 + alloc_nid_done(sbi, *nid); 1588 + goto retry; 1589 + } 1592 1590 return true; 1593 1591 } 1594 1592 spin_unlock(&nm_i->free_nid_list_lock); ··· 1650 1634 1651 1635 if (need_free) 1652 1636 kmem_cache_free(free_nid_slab, i); 1637 + } 1638 + 1639 + int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) 1640 + { 1641 + struct f2fs_nm_info *nm_i = NM_I(sbi); 1642 + struct free_nid *i, *next; 1643 + int nr = nr_shrink; 1644 + 1645 + if (!mutex_trylock(&nm_i->build_lock)) 1646 + return 0; 1647 + 1648 + spin_lock(&nm_i->free_nid_list_lock); 1649 + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 1650 + if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK) 1651 + break; 1652 + if (i->state == NID_ALLOC) 1653 + continue; 1654 + __del_from_free_nid_list(nm_i, i); 1655 + kmem_cache_free(free_nid_slab, i); 1656 + nm_i->fcnt--; 1657 + nr_shrink--; 1658 + } 1659 + spin_unlock(&nm_i->free_nid_list_lock); 1660 + mutex_unlock(&nm_i->build_lock); 1661 + 1662 + return nr - nr_shrink; 1653 1663 } 1654 1664 1655 1665 void recover_inline_xattr(struct inode *inode, struct page *page)

+35 -8

fs/f2fs/recovery.c

··· 399 399 f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); 400 400 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); 401 401 402 - for (; start < end; start++) { 402 + for (; start < end; start++, dn.ofs_in_node++) { 403 403 block_t src, dest; 404 404 405 405 src = datablock_addr(dn.node_page, dn.ofs_in_node); 406 406 dest = datablock_addr(page, dn.ofs_in_node); 407 407 408 - if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && 409 - is_valid_blkaddr(sbi, dest, META_POR)) { 408 + /* skip recovering if dest is the same as src */ 409 + if (src == dest) 410 + continue; 411 + 412 + /* dest is invalid, just invalidate src block */ 413 + if (dest == NULL_ADDR) { 414 + truncate_data_blocks_range(&dn, 1); 415 + continue; 416 + } 417 + 418 + /* 419 + * dest is reserved block, invalidate src block 420 + * and then reserve one new block in dnode page. 421 + */ 422 + if (dest == NEW_ADDR) { 423 + truncate_data_blocks_range(&dn, 1); 424 + err = reserve_new_block(&dn); 425 + f2fs_bug_on(sbi, err); 426 + continue; 427 + } 428 + 429 + /* dest is valid block, try to recover from src to dest */ 430 + if (is_valid_blkaddr(sbi, dest, META_POR)) { 410 431 411 432 if (src == NULL_ADDR) { 412 433 err = reserve_new_block(&dn); ··· 445 424 ni.version, false); 446 425 recovered++; 447 426 } 448 - dn.ofs_in_node++; 449 427 } 450 428 451 429 if (IS_INODE(dn.node_page)) ··· 545 525 546 526 INIT_LIST_HEAD(&inode_list); 547 527 548 - /* step #1: find fsynced inode numbers */ 549 - set_sbi_flag(sbi, SBI_POR_DOING); 550 - 551 528 /* prevent checkpoint */ 552 529 mutex_lock(&sbi->cp_mutex); 553 530 554 531 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 555 532 533 + /* step #1: find fsynced inode numbers */ 556 534 err = find_fsync_dnodes(sbi, &inode_list); 557 535 if (err) 558 536 goto out; ··· 579 561 580 562 clear_sbi_flag(sbi, SBI_POR_DOING); 581 563 if (err) { 582 - discard_next_dnode(sbi, blkaddr); 564 + bool invalidate = false; 565 + 566 + if (discard_next_dnode(sbi, blkaddr)) 567 + invalidate = true; 583 568 584 569 /* Flush all the NAT/SIT pages */ 585 570 while (get_pages(sbi, F2FS_DIRTY_META)) 586 571 sync_meta_pages(sbi, META, LONG_MAX); 572 + 573 + /* invalidate temporary meta page */ 574 + if (invalidate) 575 + invalidate_mapping_pages(META_MAPPING(sbi), 576 + blkaddr, blkaddr); 577 + 587 578 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 588 579 mutex_unlock(&sbi->cp_mutex); 589 580 } else if (need_writecp) {

+49 -29

fs/f2fs/segment.c

··· 197 197 { 198 198 struct f2fs_inode_info *fi = F2FS_I(inode); 199 199 struct inmem_pages *new; 200 - int err; 201 200 202 - SetPagePrivate(page); 203 201 f2fs_trace_pid(page); 202 + 203 + set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); 204 + SetPagePrivate(page); 204 205 205 206 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 206 207 207 208 /* add atomic page indices to the list */ 208 209 new->page = page; 209 210 INIT_LIST_HEAD(&new->list); 210 - retry: 211 + 211 212 /* increase reference count with clean state */ 212 213 mutex_lock(&fi->inmem_lock); 213 - err = radix_tree_insert(&fi->inmem_root, page->index, new); 214 - if (err == -EEXIST) { 215 - mutex_unlock(&fi->inmem_lock); 216 - kmem_cache_free(inmem_entry_slab, new); 217 - return; 218 - } else if (err) { 219 - mutex_unlock(&fi->inmem_lock); 220 - goto retry; 221 - } 222 214 get_page(page); 223 215 list_add_tail(&new->list, &fi->inmem_pages); 224 216 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); ··· 219 227 trace_f2fs_register_inmem_page(page, INMEM); 220 228 } 221 229 222 - void commit_inmem_pages(struct inode *inode, bool abort) 230 + int commit_inmem_pages(struct inode *inode, bool abort) 223 231 { 224 232 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 225 233 struct f2fs_inode_info *fi = F2FS_I(inode); ··· 231 239 .rw = WRITE_SYNC | REQ_PRIO, 232 240 .encrypted_page = NULL, 233 241 }; 242 + int err = 0; 234 243 235 244 /* 236 245 * The abort is true only when f2fs_evict_inode is called. ··· 247 254 248 255 mutex_lock(&fi->inmem_lock); 249 256 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 257 + lock_page(cur->page); 250 258 if (!abort) { 251 - lock_page(cur->page); 252 259 if (cur->page->mapping == inode->i_mapping) { 253 260 set_page_dirty(cur->page); 254 261 f2fs_wait_on_page_writeback(cur->page, DATA); ··· 256 263 inode_dec_dirty_pages(inode); 257 264 trace_f2fs_commit_inmem_page(cur->page, INMEM); 258 265 fio.page = cur->page; 259 - do_write_data_page(&fio); 266 + err = do_write_data_page(&fio); 260 267 submit_bio = true; 268 + if (err) { 269 + unlock_page(cur->page); 270 + break; 271 + } 261 272 } 262 - f2fs_put_page(cur->page, 1); 263 273 } else { 264 274 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); 265 - put_page(cur->page); 266 275 } 267 - radix_tree_delete(&fi->inmem_root, cur->page->index); 276 + set_page_private(cur->page, 0); 277 + ClearPagePrivate(cur->page); 278 + f2fs_put_page(cur->page, 1); 279 + 268 280 list_del(&cur->list); 269 281 kmem_cache_free(inmem_entry_slab, cur); 270 282 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); ··· 281 283 if (submit_bio) 282 284 f2fs_submit_merged_bio(sbi, DATA, WRITE); 283 285 } 286 + return err; 284 287 } 285 288 286 289 /* ··· 303 304 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 304 305 { 305 306 /* try to shrink extent cache when there is no enough memory */ 306 - f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 307 + if (!available_free_memory(sbi, EXTENT_CACHE)) 308 + f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 307 309 308 - /* check the # of cached NAT entries and prefree segments */ 309 - if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 310 + /* check the # of cached NAT entries */ 311 + if (!available_free_memory(sbi, NAT_ENTRIES)) 312 + try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); 313 + 314 + if (!available_free_memory(sbi, FREE_NIDS)) 315 + try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); 316 + 317 + /* checkpoint is the only way to shrink partial cached entries */ 318 + if (!available_free_memory(sbi, NAT_ENTRIES) || 310 319 excess_prefree_segs(sbi) || 311 320 !available_free_memory(sbi, INO_ENTRIES)) 312 321 f2fs_sync_fs(sbi->sb, true); ··· 330 323 return 0; 331 324 332 325 if (!llist_empty(&fcc->issue_list)) { 333 - struct bio *bio = bio_alloc(GFP_NOIO, 0); 326 + struct bio *bio; 334 327 struct flush_cmd *cmd, *next; 335 328 int ret; 329 + 330 + bio = f2fs_bio_alloc(0); 336 331 337 332 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 338 333 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); ··· 367 358 if (test_opt(sbi, NOBARRIER)) 368 359 return 0; 369 360 370 - if (!test_opt(sbi, FLUSH_MERGE)) 371 - return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 361 + if (!test_opt(sbi, FLUSH_MERGE)) { 362 + struct bio *bio = f2fs_bio_alloc(0); 363 + int ret; 364 + 365 + bio->bi_bdev = sbi->sb->s_bdev; 366 + ret = submit_bio_wait(WRITE_FLUSH, bio); 367 + bio_put(bio); 368 + return ret; 369 + } 372 370 373 371 init_completion(&cmd.wait); 374 372 ··· 519 503 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 520 504 } 521 505 522 - void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) 506 + bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) 523 507 { 524 508 int err = -ENOTSUPP; 525 509 ··· 529 513 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 530 514 531 515 if (f2fs_test_bit(offset, se->discard_map)) 532 - return; 516 + return false; 533 517 534 518 err = f2fs_issue_discard(sbi, blkaddr, 1); 535 519 } 536 520 537 - if (err) 521 + if (err) { 538 522 update_meta_page(sbi, NULL, blkaddr); 523 + return true; 524 + } 525 + return false; 539 526 } 540 527 541 528 static void __add_discard_entry(struct f2fs_sb_info *sbi, ··· 1237 1218 mutex_lock(&sit_i->sentry_lock); 1238 1219 1239 1220 /* direct_io'ed data is aligned to the segment for better performance */ 1240 - if (direct_io && curseg->next_blkoff) 1221 + if (direct_io && curseg->next_blkoff && 1222 + !has_not_enough_free_secs(sbi, 0)) 1241 1223 __allocate_new_segments(sbi, type); 1242 1224 1243 1225 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); ··· 1753 1733 static struct sit_entry_set *grab_sit_entry_set(void) 1754 1734 { 1755 1735 struct sit_entry_set *ses = 1756 - f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); 1736 + f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); 1757 1737 1758 1738 ses->entry_cnt = 0; 1759 1739 INIT_LIST_HEAD(&ses->set_list);

+17 -38

fs/f2fs/segment.h

··· 177 177 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 178 178 }; 179 179 180 + /* 181 + * this value is set in page as a private data which indicate that 182 + * the page is atomically written, and it is in inmem_pages list. 183 + */ 184 + #define ATOMIC_WRITTEN_PAGE 0x0000ffff 185 + 186 + #define IS_ATOMIC_WRITTEN_PAGE(page) \ 187 + (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) 188 + 180 189 struct inmem_pages { 181 190 struct list_head list; 182 191 struct page *page; ··· 564 555 return curseg->next_blkoff; 565 556 } 566 557 567 - #ifdef CONFIG_F2FS_CHECK_FS 568 558 static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 569 559 { 570 - BUG_ON(segno > TOTAL_SEGS(sbi) - 1); 560 + f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); 571 561 } 572 562 573 563 static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 574 564 { 575 - BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); 576 - BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); 565 + f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi) 566 + || blk_addr >= MAX_BLKADDR(sbi)); 577 567 } 578 568 579 569 /* ··· 581 573 static inline void check_block_count(struct f2fs_sb_info *sbi, 582 574 int segno, struct f2fs_sit_entry *raw_sit) 583 575 { 576 + #ifdef CONFIG_F2FS_CHECK_FS 584 577 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; 585 578 int valid_blocks = 0; 586 579 int cur_pos = 0, next_pos; 587 - 588 - /* check segment usage */ 589 - BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); 590 - 591 - /* check boundary of a given segment number */ 592 - BUG_ON(segno > TOTAL_SEGS(sbi) - 1); 593 580 594 581 /* check bitmap with valid block count */ 595 582 do { ··· 601 598 is_valid = !is_valid; 602 599 } while (cur_pos < sbi->blocks_per_seg); 603 600 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 604 - } 605 - #else 606 - static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 607 - { 608 - if (segno > TOTAL_SEGS(sbi) - 1) 609 - set_sbi_flag(sbi, SBI_NEED_FSCK); 610 - } 611 - 612 - static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 613 - { 614 - if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) 615 - set_sbi_flag(sbi, SBI_NEED_FSCK); 616 - } 617 - 618 - /* 619 - * Summary block is always treated as an invalid block 620 - */ 621 - static inline void check_block_count(struct f2fs_sb_info *sbi, 622 - int segno, struct f2fs_sit_entry *raw_sit) 623 - { 624 - /* check segment usage */ 625 - if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) 626 - set_sbi_flag(sbi, SBI_NEED_FSCK); 627 - 628 - /* check boundary of a given segment number */ 629 - if (segno > TOTAL_SEGS(sbi) - 1) 630 - set_sbi_flag(sbi, SBI_NEED_FSCK); 631 - } 632 601 #endif 602 + /* check segment usage, and check boundary of a given segment number */ 603 + f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg 604 + || segno > TOTAL_SEGS(sbi) - 1); 605 + } 633 606 634 607 static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 635 608 unsigned int start)

+139

fs/f2fs/shrinker.c

··· 1 + /* 2 + * f2fs shrinker support 3 + * the basic infra was copied from fs/ubifs/shrinker.c 4 + * 5 + * Copyright (c) 2015 Motorola Mobility 6 + * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org> 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + */ 12 + #include <linux/fs.h> 13 + #include <linux/f2fs_fs.h> 14 + 15 + #include "f2fs.h" 16 + 17 + static LIST_HEAD(f2fs_list); 18 + static DEFINE_SPINLOCK(f2fs_list_lock); 19 + static unsigned int shrinker_run_no; 20 + 21 + static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) 22 + { 23 + return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; 24 + } 25 + 26 + static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) 27 + { 28 + if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK) 29 + return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK; 30 + return 0; 31 + } 32 + 33 + static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) 34 + { 35 + return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node); 36 + } 37 + 38 + unsigned long f2fs_shrink_count(struct shrinker *shrink, 39 + struct shrink_control *sc) 40 + { 41 + struct f2fs_sb_info *sbi; 42 + struct list_head *p; 43 + unsigned long count = 0; 44 + 45 + spin_lock(&f2fs_list_lock); 46 + p = f2fs_list.next; 47 + while (p != &f2fs_list) { 48 + sbi = list_entry(p, struct f2fs_sb_info, s_list); 49 + 50 + /* stop f2fs_put_super */ 51 + if (!mutex_trylock(&sbi->umount_mutex)) { 52 + p = p->next; 53 + continue; 54 + } 55 + spin_unlock(&f2fs_list_lock); 56 + 57 + /* count extent cache entries */ 58 + count += __count_extent_cache(sbi); 59 + 60 + /* shrink clean nat cache entries */ 61 + count += __count_nat_entries(sbi); 62 + 63 + /* count free nids cache entries */ 64 + count += __count_free_nids(sbi); 65 + 66 + spin_lock(&f2fs_list_lock); 67 + p = p->next; 68 + mutex_unlock(&sbi->umount_mutex); 69 + } 70 + spin_unlock(&f2fs_list_lock); 71 + return count; 72 + } 73 + 74 + unsigned long f2fs_shrink_scan(struct shrinker *shrink, 75 + struct shrink_control *sc) 76 + { 77 + unsigned long nr = sc->nr_to_scan; 78 + struct f2fs_sb_info *sbi; 79 + struct list_head *p; 80 + unsigned int run_no; 81 + unsigned long freed = 0; 82 + 83 + spin_lock(&f2fs_list_lock); 84 + do { 85 + run_no = ++shrinker_run_no; 86 + } while (run_no == 0); 87 + p = f2fs_list.next; 88 + while (p != &f2fs_list) { 89 + sbi = list_entry(p, struct f2fs_sb_info, s_list); 90 + 91 + if (sbi->shrinker_run_no == run_no) 92 + break; 93 + 94 + /* stop f2fs_put_super */ 95 + if (!mutex_trylock(&sbi->umount_mutex)) { 96 + p = p->next; 97 + continue; 98 + } 99 + spin_unlock(&f2fs_list_lock); 100 + 101 + sbi->shrinker_run_no = run_no; 102 + 103 + /* shrink extent cache entries */ 104 + freed += f2fs_shrink_extent_tree(sbi, nr >> 1); 105 + 106 + /* shrink clean nat cache entries */ 107 + if (freed < nr) 108 + freed += try_to_free_nats(sbi, nr - freed); 109 + 110 + /* shrink free nids cache entries */ 111 + if (freed < nr) 112 + freed += try_to_free_nids(sbi, nr - freed); 113 + 114 + spin_lock(&f2fs_list_lock); 115 + p = p->next; 116 + list_move_tail(&sbi->s_list, &f2fs_list); 117 + mutex_unlock(&sbi->umount_mutex); 118 + if (freed >= nr) 119 + break; 120 + } 121 + spin_unlock(&f2fs_list_lock); 122 + return freed; 123 + } 124 + 125 + void f2fs_join_shrinker(struct f2fs_sb_info *sbi) 126 + { 127 + spin_lock(&f2fs_list_lock); 128 + list_add_tail(&sbi->s_list, &f2fs_list); 129 + spin_unlock(&f2fs_list_lock); 130 + } 131 + 132 + void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) 133 + { 134 + f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); 135 + 136 + spin_lock(&f2fs_list_lock); 137 + list_del(&sbi->s_list); 138 + spin_unlock(&f2fs_list_lock); 139 + }

+57 -8

fs/f2fs/super.c

··· 39 39 static struct kmem_cache *f2fs_inode_cachep; 40 40 static struct kset *f2fs_kset; 41 41 42 + /* f2fs-wide shrinker description */ 43 + static struct shrinker f2fs_shrinker_info = { 44 + .scan_objects = f2fs_shrink_scan, 45 + .count_objects = f2fs_shrink_count, 46 + .seeks = DEFAULT_SEEKS, 47 + }; 48 + 42 49 enum { 43 50 Opt_gc_background, 44 51 Opt_disable_roll_forward, ··· 65 58 Opt_nobarrier, 66 59 Opt_fastboot, 67 60 Opt_extent_cache, 61 + Opt_noextent_cache, 68 62 Opt_noinline_data, 69 63 Opt_err, 70 64 }; ··· 89 81 {Opt_nobarrier, "nobarrier"}, 90 82 {Opt_fastboot, "fastboot"}, 91 83 {Opt_extent_cache, "extent_cache"}, 84 + {Opt_noextent_cache, "noextent_cache"}, 92 85 {Opt_noinline_data, "noinline_data"}, 93 86 {Opt_err, NULL}, 94 87 }; ··· 391 382 case Opt_extent_cache: 392 383 set_opt(sbi, EXTENT_CACHE); 393 384 break; 385 + case Opt_noextent_cache: 386 + clear_opt(sbi, EXTENT_CACHE); 387 + break; 394 388 case Opt_noinline_data: 395 389 clear_opt(sbi, INLINE_DATA); 396 390 break; ··· 422 410 atomic_set(&fi->dirty_pages, 0); 423 411 fi->i_current_depth = 1; 424 412 fi->i_advise = 0; 425 - rwlock_init(&fi->ext_lock); 426 413 init_rwsem(&fi->i_sem); 427 - INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); 428 414 INIT_LIST_HEAD(&fi->inmem_pages); 429 415 mutex_init(&fi->inmem_lock); 430 416 ··· 451 441 */ 452 442 if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { 453 443 if (!inode->i_nlink && !is_bad_inode(inode)) { 444 + /* to avoid evict_inode call simultaneously */ 445 + atomic_inc(&inode->i_count); 454 446 spin_unlock(&inode->i_lock); 455 447 456 448 /* some remained atomic pages should discarded */ 457 449 if (f2fs_is_atomic_file(inode)) 458 450 commit_inmem_pages(inode, true); 459 451 452 + /* should remain fi->extent_tree for writepage */ 453 + f2fs_destroy_extent_node(inode); 454 + 460 455 sb_start_intwrite(inode->i_sb); 461 456 i_size_write(inode, 0); 462 457 463 458 if (F2FS_HAS_BLOCKS(inode)) 464 - f2fs_truncate(inode); 459 + f2fs_truncate(inode, true); 465 460 466 461 sb_end_intwrite(inode->i_sb); 467 462 ··· 476 461 F2FS_I(inode)->i_crypt_info); 477 462 #endif 478 463 spin_lock(&inode->i_lock); 464 + atomic_dec(&inode->i_count); 479 465 } 480 466 return 0; 481 467 } ··· 514 498 } 515 499 kobject_del(&sbi->s_kobj); 516 500 517 - f2fs_destroy_stats(sbi); 518 501 stop_gc_thread(sbi); 502 + 503 + /* prevent remaining shrinker jobs */ 504 + mutex_lock(&sbi->umount_mutex); 519 505 520 506 /* 521 507 * We don't need to do checkpoint when superblock is clean. ··· 532 514 write_checkpoint(sbi, &cpc); 533 515 } 534 516 517 + /* write_checkpoint can update stat informaion */ 518 + f2fs_destroy_stats(sbi); 519 + 535 520 /* 536 521 * normally superblock is clean, so we need to release this. 537 522 * In addition, EIO will skip do checkpoint, we need this as well. 538 523 */ 539 524 release_dirty_inode(sbi); 540 525 release_discard_addrs(sbi); 526 + 527 + f2fs_leave_shrinker(sbi); 528 + mutex_unlock(&sbi->umount_mutex); 541 529 542 530 iput(sbi->node_inode); 543 531 iput(sbi->meta_inode); ··· 671 647 seq_puts(seq, ",fastboot"); 672 648 if (test_opt(sbi, EXTENT_CACHE)) 673 649 seq_puts(seq, ",extent_cache"); 650 + else 651 + seq_puts(seq, ",noextent_cache"); 674 652 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 675 653 676 654 return 0; ··· 693 667 struct seg_entry *se = get_seg_entry(sbi, i); 694 668 695 669 if ((i % 10) == 0) 696 - seq_printf(seq, "%-5d", i); 670 + seq_printf(seq, "%-10d", i); 697 671 seq_printf(seq, "%d|%-3u", se->type, 698 672 get_valid_blocks(sbi, i, 1)); 699 673 if ((i % 10) == 9 || i == (total_segs - 1)) ··· 725 699 726 700 set_opt(sbi, BG_GC); 727 701 set_opt(sbi, INLINE_DATA); 702 + set_opt(sbi, EXTENT_CACHE); 728 703 729 704 #ifdef CONFIG_F2FS_FS_XATTR 730 705 set_opt(sbi, XATTR_USER); ··· 997 970 998 971 sbi->dir_level = DEF_DIR_LEVEL; 999 972 clear_sbi_flag(sbi, SBI_NEED_FSCK); 973 + 974 + INIT_LIST_HEAD(&sbi->s_list); 975 + mutex_init(&sbi->umount_mutex); 1000 976 } 1001 977 1002 978 /* ··· 1165 1135 mutex_init(&sbi->writepages); 1166 1136 mutex_init(&sbi->cp_mutex); 1167 1137 init_rwsem(&sbi->node_write); 1168 - clear_sbi_flag(sbi, SBI_POR_DOING); 1138 + 1139 + /* disallow all the data/node/meta page writes */ 1140 + set_sbi_flag(sbi, SBI_POR_DOING); 1169 1141 spin_lock_init(&sbi->stat_lock); 1170 1142 1171 1143 init_rwsem(&sbi->read_io.io_rwsem); ··· 1244 1212 goto free_nm; 1245 1213 } 1246 1214 1215 + f2fs_join_shrinker(sbi); 1216 + 1247 1217 /* if there are nt orphan nodes free them */ 1248 - recover_orphan_inodes(sbi); 1218 + err = recover_orphan_inodes(sbi); 1219 + if (err) 1220 + goto free_node_inode; 1249 1221 1250 1222 /* read root inode and dentry */ 1251 1223 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); ··· 1311 1275 goto free_kobj; 1312 1276 } 1313 1277 } 1278 + /* recover_fsync_data() cleared this already */ 1279 + clear_sbi_flag(sbi, SBI_POR_DOING); 1314 1280 1315 1281 /* 1316 1282 * If filesystem is not mounted as read-only then ··· 1346 1308 dput(sb->s_root); 1347 1309 sb->s_root = NULL; 1348 1310 free_node_inode: 1311 + mutex_lock(&sbi->umount_mutex); 1312 + f2fs_leave_shrinker(sbi); 1349 1313 iput(sbi->node_inode); 1314 + mutex_unlock(&sbi->umount_mutex); 1350 1315 free_nm: 1351 1316 destroy_node_manager(sbi); 1352 1317 free_sm: ··· 1445 1404 err = f2fs_init_crypto(); 1446 1405 if (err) 1447 1406 goto free_kset; 1448 - err = register_filesystem(&f2fs_fs_type); 1407 + 1408 + err = register_shrinker(&f2fs_shrinker_info); 1449 1409 if (err) 1450 1410 goto free_crypto; 1411 + 1412 + err = register_filesystem(&f2fs_fs_type); 1413 + if (err) 1414 + goto free_shrinker; 1451 1415 f2fs_create_root_stats(); 1452 1416 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); 1453 1417 return 0; 1454 1418 1419 + free_shrinker: 1420 + unregister_shrinker(&f2fs_shrinker_info); 1455 1421 free_crypto: 1456 1422 f2fs_exit_crypto(); 1457 1423 free_kset: ··· 1481 1433 { 1482 1434 remove_proc_entry("fs/f2fs", NULL); 1483 1435 f2fs_destroy_root_stats(); 1436 + unregister_shrinker(&f2fs_shrinker_info); 1484 1437 unregister_filesystem(&f2fs_fs_type); 1485 1438 f2fs_exit_crypto(); 1486 1439 destroy_extent_cache();

+4 -1

fs/f2fs/xattr.c

··· 499 499 500 500 len = strlen(name); 501 501 502 - if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) 502 + if (len > F2FS_NAME_LEN) 503 503 return -ERANGE; 504 + 505 + if (size > MAX_VALUE_LEN(inode)) 506 + return -E2BIG; 504 507 505 508 base_addr = read_all_xattrs(inode, ipage); 506 509 if (!base_addr)

+13 -3

include/linux/f2fs_fs.h

··· 417 417 418 418 #define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) 419 419 420 - /* the number of dentry in a block */ 421 - #define NR_DENTRY_IN_BLOCK 214 422 - 423 420 /* MAX level for dir lookup */ 424 421 #define MAX_DIR_HASH_DEPTH 63 425 422 426 423 /* MAX buckets in one level of dir */ 427 424 #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) 428 425 426 + /* 427 + * space utilization of regular dentry and inline dentry 428 + * regular dentry inline dentry 429 + * bitmap 1 * 27 = 27 1 * 23 = 23 430 + * reserved 1 * 3 = 3 1 * 7 = 7 431 + * dentry 11 * 214 = 2354 11 * 182 = 2002 432 + * filename 8 * 214 = 1712 8 * 182 = 1456 433 + * total 4096 3488 434 + * 435 + * Note: there are more reserved space in inline dentry than in regular 436 + * dentry, when converting inline dentry we should handle this carefully. 437 + */ 438 + #define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ 429 439 #define SIZE_OF_DIR_ENTRY 11 /* by byte */ 430 440 #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ 431 441 BITS_PER_BYTE)

+6 -6

include/trace/events/f2fs.h

··· 1099 1099 TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, 1100 1100 1101 1101 TP_PROTO(struct inode *inode, unsigned int pgofs, 1102 - struct extent_node *en), 1102 + struct extent_info *ei), 1103 1103 1104 - TP_ARGS(inode, pgofs, en), 1104 + TP_ARGS(inode, pgofs, ei), 1105 1105 1106 - TP_CONDITION(en), 1106 + TP_CONDITION(ei), 1107 1107 1108 1108 TP_STRUCT__entry( 1109 1109 __field(dev_t, dev) ··· 1118 1118 __entry->dev = inode->i_sb->s_dev; 1119 1119 __entry->ino = inode->i_ino; 1120 1120 __entry->pgofs = pgofs; 1121 - __entry->fofs = en->ei.fofs; 1122 - __entry->blk = en->ei.blk; 1123 - __entry->len = en->ei.len; 1121 + __entry->fofs = ei->fofs; 1122 + __entry->blk = ei->blk; 1123 + __entry->len = ei->len; 1124 1124 ), 1125 1125 1126 1126 TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "