Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
"Most part of the patches include enhancing the stability and
performance of in-memory extent caches feature.

In addition, it introduces several new features and configurable
points:
- F2FS_GOING_DOWN_METAFLUSH ioctl to test power failures
- F2FS_IOC_WRITE_CHECKPOINT ioctl to trigger checkpoint by users
- background_gc=sync mount option to do gc synchronously
- periodic checkpoints
- sysfs entry to control readahead blocks for free nids

And the following bug fixes have been merged.
- fix SSA corruption by collapse/insert_range
- correct a couple of gc behaviors
- fix the results of f2fs_map_blocks
- fix error case handling of volatile/atomic writes"

* tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (54 commits)
f2fs: fix to skip shrinking extent nodes
f2fs: fix error path of ->symlink
f2fs: fix to clear GCed flag for atomic written page
f2fs: don't need to submit bio on error case
f2fs: fix leakage of inmemory atomic pages
f2fs: refactor __find_rev_next_{zero}_bit
f2fs: support fiemap for inline_data
f2fs: flush dirty data for bmap
f2fs: relocate the tracepoint for background_gc
f2fs crypto: fix racing of accessing encrypted page among
f2fs: export ra_nid_pages to sysfs
f2fs: readahead for free nids building
f2fs: support lower priority asynchronous readahead in ra_meta_pages
f2fs: don't tag REQ_META for temporary non-meta pages
f2fs: add a tracepoint for f2fs_read_data_pages
f2fs: set GFP_NOFS for grab_cache_page
f2fs: fix SSA updates resulting in corruption
Revert "f2fs: do not skip dentry block writes"
f2fs: add F2FS_GOING_DOWN_METAFLUSH to test power-failure
f2fs: merge meta writes as many possible
...

+881 -559
+12
Documentation/ABI/testing/sysfs-fs-f2fs
··· 80 80 Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> 81 81 Description: 82 82 Controls the trimming rate in batch mode. 83 + 84 + What: /sys/fs/f2fs/<disk>/cp_interval 85 + Date: October 2015 86 + Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> 87 + Description: 88 + Controls the checkpoint timing. 89 + 90 + What: /sys/fs/f2fs/<disk>/ra_nid_pages 91 + Date: October 2015 92 + Contact: "Chao Yu" <chao2.yu@samsung.com> 93 + Description: 94 + Controls the count of nid pages to be readaheaded.
+2 -1
Documentation/filesystems/f2fs.txt
··· 102 102 collection, triggered in background when I/O subsystem is 103 103 idle. If background_gc=on, it will turn on the garbage 104 104 collection and if background_gc=off, garbage collection 105 - will be truned off. 105 + will be truned off. If background_gc=sync, it will turn 106 + on synchronous garbage collection running in background. 106 107 Default value for this option is on. So garbage 107 108 collection is on by default. 108 109 disable_roll_forward Disable the roll-forward recovery routine
+42 -7
fs/f2fs/checkpoint.c
··· 47 47 /* 48 48 * We guarantee no failure on the returned page. 49 49 */ 50 - struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 50 + static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, 51 + bool is_meta) 51 52 { 52 53 struct address_space *mapping = META_MAPPING(sbi); 53 54 struct page *page; ··· 59 58 .blk_addr = index, 60 59 .encrypted_page = NULL, 61 60 }; 61 + 62 + if (unlikely(!is_meta)) 63 + fio.rw &= ~REQ_META; 62 64 repeat: 63 65 page = grab_cache_page(mapping, index); 64 66 if (!page) { ··· 93 89 f2fs_stop_checkpoint(sbi); 94 90 out: 95 91 return page; 92 + } 93 + 94 + struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 95 + { 96 + return __get_meta_page(sbi, index, true); 97 + } 98 + 99 + /* for POR only */ 100 + struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) 101 + { 102 + return __get_meta_page(sbi, index, false); 96 103 } 97 104 98 105 bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) ··· 140 125 /* 141 126 * Readahead CP/NAT/SIT/SSA pages 142 127 */ 143 - int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) 128 + int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, 129 + int type, bool sync) 144 130 { 145 131 block_t prev_blk_addr = 0; 146 132 struct page *page; ··· 149 133 struct f2fs_io_info fio = { 150 134 .sbi = sbi, 151 135 .type = META, 152 - .rw = READ_SYNC | REQ_META | REQ_PRIO, 136 + .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA, 153 137 .encrypted_page = NULL, 154 138 }; 139 + 140 + if (unlikely(type == META_POR)) 141 + fio.rw &= ~REQ_META; 155 142 156 143 for (; nrpages-- > 0; blkno++) { 157 144 ··· 215 196 f2fs_put_page(page, 0); 216 197 217 198 if (readahead) 218 - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); 199 + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true); 219 200 } 220 201 221 202 static int f2fs_write_meta_page(struct page *page, ··· 276 257 long nr_to_write) 277 258 { 278 259 struct address_space *mapping = META_MAPPING(sbi); 279 - pgoff_t index = 0, end = LONG_MAX; 260 + pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX; 280 261 struct pagevec pvec; 281 262 long nwritten = 0; 282 263 struct writeback_control wbc = { ··· 295 276 296 277 for (i = 0; i < nr_pages; i++) { 297 278 struct page *page = pvec.pages[i]; 279 + 280 + if (prev == LONG_MAX) 281 + prev = page->index - 1; 282 + if (nr_to_write != LONG_MAX && page->index != prev + 1) { 283 + pagevec_release(&pvec); 284 + goto stop; 285 + } 298 286 299 287 lock_page(page); 300 288 ··· 323 297 break; 324 298 } 325 299 nwritten++; 300 + prev = page->index; 326 301 if (unlikely(nwritten >= nr_to_write)) 327 302 break; 328 303 } 329 304 pagevec_release(&pvec); 330 305 cond_resched(); 331 306 } 332 - 307 + stop: 333 308 if (nwritten) 334 309 f2fs_submit_merged_bio(sbi, type, WRITE); 335 310 ··· 522 495 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 523 496 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 524 497 525 - ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); 498 + ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); 526 499 527 500 for (i = 0; i < orphan_blocks; i++) { 528 501 struct page *page = get_meta_page(sbi, start_blk + i); ··· 1027 1000 1028 1001 start_blk = __start_cp_addr(sbi); 1029 1002 1003 + /* need to wait for end_io results */ 1004 + wait_on_all_pages_writeback(sbi); 1005 + if (unlikely(f2fs_cp_error(sbi))) 1006 + return; 1007 + 1030 1008 /* write out checkpoint buffer at block 0 */ 1031 1009 update_meta_page(sbi, ckpt, start_blk++); 1032 1010 ··· 1141 1109 if (cpc->reason == CP_RECOVERY) 1142 1110 f2fs_msg(sbi->sb, KERN_NOTICE, 1143 1111 "checkpoint: version = %llx", ckpt_ver); 1112 + 1113 + /* do checkpoint periodically */ 1114 + sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); 1144 1115 out: 1145 1116 mutex_unlock(&sbi->cp_mutex); 1146 1117 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
+107 -69
fs/f2fs/data.c
··· 275 275 return f2fs_reserve_block(dn, index); 276 276 } 277 277 278 - struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) 278 + struct page *get_read_data_page(struct inode *inode, pgoff_t index, 279 + int rw, bool for_write) 279 280 { 280 281 struct address_space *mapping = inode->i_mapping; 281 282 struct dnode_of_data dn; ··· 293 292 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 294 293 return read_mapping_page(mapping, index, NULL); 295 294 296 - page = grab_cache_page(mapping, index); 295 + page = f2fs_grab_cache_page(mapping, index, for_write); 297 296 if (!page) 298 297 return ERR_PTR(-ENOMEM); 299 298 ··· 353 352 return page; 354 353 f2fs_put_page(page, 0); 355 354 356 - page = get_read_data_page(inode, index, READ_SYNC); 355 + page = get_read_data_page(inode, index, READ_SYNC, false); 357 356 if (IS_ERR(page)) 358 357 return page; 359 358 ··· 373 372 * Because, the callers, functions in dir.c and GC, should be able to know 374 373 * whether this page exists or not. 375 374 */ 376 - struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 375 + struct page *get_lock_data_page(struct inode *inode, pgoff_t index, 376 + bool for_write) 377 377 { 378 378 struct address_space *mapping = inode->i_mapping; 379 379 struct page *page; 380 380 repeat: 381 - page = get_read_data_page(inode, index, READ_SYNC); 381 + page = get_read_data_page(inode, index, READ_SYNC, for_write); 382 382 if (IS_ERR(page)) 383 383 return page; 384 384 ··· 413 411 struct dnode_of_data dn; 414 412 int err; 415 413 repeat: 416 - page = grab_cache_page(mapping, index); 414 + page = f2fs_grab_cache_page(mapping, index, true); 417 415 if (!page) { 418 416 /* 419 417 * before exiting, we should make sure ipage will be released ··· 441 439 } else { 442 440 f2fs_put_page(page, 1); 443 441 444 - page = get_read_data_page(inode, index, READ_SYNC); 442 + page = get_read_data_page(inode, index, READ_SYNC, true); 445 443 if (IS_ERR(page)) 446 444 goto repeat; 447 445 ··· 449 447 lock_page(page); 450 448 } 451 449 got_it: 452 - if (new_i_size && 453 - i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { 454 - i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 450 + if (new_i_size && i_size_read(inode) < 451 + ((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) { 452 + i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT)); 455 453 /* Only the directory inode sets new_i_size */ 456 454 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); 457 455 } ··· 491 489 /* update i_size */ 492 490 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 493 491 dn->ofs_in_node; 494 - if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 495 - i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 492 + if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)) 493 + i_size_write(dn->inode, 494 + ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)); 496 495 497 496 /* direct IO doesn't use extent cache to maximize the performance */ 498 497 f2fs_drop_largest_extent(dn->inode, fofs); ··· 525 522 526 523 while (dn.ofs_in_node < end_offset && len) { 527 524 block_t blkaddr; 525 + 526 + if (unlikely(f2fs_cp_error(sbi))) 527 + goto sync_out; 528 528 529 529 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 530 530 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { ··· 571 565 { 572 566 unsigned int maxblocks = map->m_len; 573 567 struct dnode_of_data dn; 568 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 574 569 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 575 570 pgoff_t pgofs, end_offset; 576 571 int err = 0, ofs = 1; ··· 602 595 err = 0; 603 596 goto unlock_out; 604 597 } 605 - if (dn.data_blkaddr == NEW_ADDR) { 606 - if (flag == F2FS_GET_BLOCK_BMAP) { 607 - err = -ENOENT; 608 - goto put_out; 609 - } else if (flag == F2FS_GET_BLOCK_READ || 610 - flag == F2FS_GET_BLOCK_DIO) { 611 - goto put_out; 598 + 599 + if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) { 600 + if (create) { 601 + if (unlikely(f2fs_cp_error(sbi))) { 602 + err = -EIO; 603 + goto put_out; 604 + } 605 + err = __allocate_data_block(&dn); 606 + if (err) 607 + goto put_out; 608 + allocated = true; 609 + map->m_flags = F2FS_MAP_NEW; 610 + } else { 611 + if (flag != F2FS_GET_BLOCK_FIEMAP || 612 + dn.data_blkaddr != NEW_ADDR) { 613 + if (flag == F2FS_GET_BLOCK_BMAP) 614 + err = -ENOENT; 615 + goto put_out; 616 + } 617 + 618 + /* 619 + * preallocated unwritten block should be mapped 620 + * for fiemap. 621 + */ 622 + if (dn.data_blkaddr == NEW_ADDR) 623 + map->m_flags = F2FS_MAP_UNWRITTEN; 612 624 } 613 - /* 614 - * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP), 615 - * mark it as mapped and unwritten block. 616 - */ 617 625 } 618 626 619 - if (dn.data_blkaddr != NULL_ADDR) { 620 - map->m_flags = F2FS_MAP_MAPPED; 621 - map->m_pblk = dn.data_blkaddr; 622 - if (dn.data_blkaddr == NEW_ADDR) 623 - map->m_flags |= F2FS_MAP_UNWRITTEN; 624 - } else if (create) { 625 - err = __allocate_data_block(&dn); 626 - if (err) 627 - goto put_out; 628 - allocated = true; 629 - map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; 630 - map->m_pblk = dn.data_blkaddr; 631 - } else { 632 - if (flag == F2FS_GET_BLOCK_BMAP) 633 - err = -ENOENT; 634 - goto put_out; 635 - } 627 + map->m_flags |= F2FS_MAP_MAPPED; 628 + map->m_pblk = dn.data_blkaddr; 629 + map->m_len = 1; 636 630 637 631 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 638 - map->m_len = 1; 639 632 dn.ofs_in_node++; 640 633 pgofs++; 641 634 ··· 654 647 goto unlock_out; 655 648 } 656 649 657 - if (dn.data_blkaddr == NEW_ADDR && 658 - flag != F2FS_GET_BLOCK_FIEMAP) 659 - goto put_out; 660 - 661 650 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 662 651 } 663 652 664 653 if (maxblocks > map->m_len) { 665 654 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 666 - if (blkaddr == NULL_ADDR && create) { 667 - err = __allocate_data_block(&dn); 668 - if (err) 669 - goto sync_out; 670 - allocated = true; 671 - map->m_flags |= F2FS_MAP_NEW; 672 - blkaddr = dn.data_blkaddr; 655 + 656 + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { 657 + if (create) { 658 + if (unlikely(f2fs_cp_error(sbi))) { 659 + err = -EIO; 660 + goto sync_out; 661 + } 662 + err = __allocate_data_block(&dn); 663 + if (err) 664 + goto sync_out; 665 + allocated = true; 666 + map->m_flags |= F2FS_MAP_NEW; 667 + blkaddr = dn.data_blkaddr; 668 + } else { 669 + /* 670 + * we only merge preallocated unwritten blocks 671 + * for fiemap. 672 + */ 673 + if (flag != F2FS_GET_BLOCK_FIEMAP || 674 + blkaddr != NEW_ADDR) 675 + goto sync_out; 676 + } 673 677 } 678 + 674 679 /* Give more consecutive addresses for the readahead */ 675 680 if ((map->m_pblk != NEW_ADDR && 676 681 blkaddr == (map->m_pblk + ofs)) || ··· 770 751 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); 771 752 if (ret) 772 753 return ret; 754 + 755 + if (f2fs_has_inline_data(inode)) { 756 + ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); 757 + if (ret != -EAGAIN) 758 + return ret; 759 + } 773 760 774 761 mutex_lock(&inode->i_mutex); 775 762 ··· 928 903 map.m_lblk = block_in_file; 929 904 map.m_len = last_block - block_in_file; 930 905 931 - if (f2fs_map_blocks(inode, &map, 0, false)) 906 + if (f2fs_map_blocks(inode, &map, 0, 907 + F2FS_GET_BLOCK_READ)) 932 908 goto set_error_page; 933 909 } 934 910 got_it: ··· 962 936 963 937 if (f2fs_encrypted_inode(inode) && 964 938 S_ISREG(inode->i_mode)) { 965 - struct page *cpage; 966 939 967 940 ctx = f2fs_get_crypto_ctx(inode); 968 941 if (IS_ERR(ctx)) 969 942 goto set_error_page; 970 943 971 944 /* wait the page to be moved by cleaning */ 972 - cpage = find_lock_page( 973 - META_MAPPING(F2FS_I_SB(inode)), 974 - block_nr); 975 - if (cpage) { 976 - f2fs_wait_on_page_writeback(cpage, 977 - DATA); 978 - f2fs_put_page(cpage, 1); 979 - } 945 + f2fs_wait_on_encrypted_page_writeback( 946 + F2FS_I_SB(inode), block_nr); 980 947 } 981 948 982 949 bio = bio_alloc(GFP_KERNEL, ··· 1031 1012 struct list_head *pages, unsigned nr_pages) 1032 1013 { 1033 1014 struct inode *inode = file->f_mapping->host; 1015 + struct page *page = list_entry(pages->prev, struct page, lru); 1016 + 1017 + trace_f2fs_readpages(inode, page, nr_pages); 1034 1018 1035 1019 /* If the file has inline data, skip readpages */ 1036 1020 if (f2fs_has_inline_data(inode)) ··· 1063 1041 } 1064 1042 1065 1043 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { 1044 + 1045 + /* wait for GCed encrypted page writeback */ 1046 + f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), 1047 + fio->blk_addr); 1048 + 1066 1049 fio->encrypted_page = f2fs_encrypt(inode, fio->page); 1067 1050 if (IS_ERR(fio->encrypted_page)) { 1068 1051 err = PTR_ERR(fio->encrypted_page); ··· 1456 1429 1457 1430 f2fs_wait_on_page_writeback(page, DATA); 1458 1431 1432 + /* wait for GCed encrypted page writeback */ 1433 + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1434 + f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); 1435 + 1459 1436 if (len == PAGE_CACHE_SIZE) 1460 1437 goto out_update; 1461 1438 if (PageUptodate(page)) ··· 1582 1551 1583 1552 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1584 1553 1585 - if (iov_iter_rw(iter) == WRITE) 1554 + if (iov_iter_rw(iter) == WRITE) { 1586 1555 __allocate_data_blocks(inode, offset, count); 1556 + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 1557 + err = -EIO; 1558 + goto out; 1559 + } 1560 + } 1587 1561 1588 1562 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); 1563 + out: 1589 1564 if (err < 0 && iov_iter_rw(iter) == WRITE) 1590 1565 f2fs_write_failed(mapping, offset + count); 1591 1566 ··· 1673 1636 { 1674 1637 struct inode *inode = mapping->host; 1675 1638 1676 - /* we don't need to use inline_data strictly */ 1677 - if (f2fs_has_inline_data(inode)) { 1678 - int err = f2fs_convert_inline_inode(inode); 1679 - if (err) 1680 - return err; 1681 - } 1639 + if (f2fs_has_inline_data(inode)) 1640 + return 0; 1641 + 1642 + /* make sure allocating whole blocks */ 1643 + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 1644 + filemap_write_and_wait(mapping); 1645 + 1682 1646 return generic_block_bmap(mapping, block, get_data_block_bmap); 1683 1647 } 1684 1648
+18 -18
fs/f2fs/debug.c
··· 33 33 int i; 34 34 35 35 /* validation check of the segment numbers */ 36 - si->hit_largest = atomic_read(&sbi->read_hit_largest); 37 - si->hit_cached = atomic_read(&sbi->read_hit_cached); 38 - si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree); 36 + si->hit_largest = atomic64_read(&sbi->read_hit_largest); 37 + si->hit_cached = atomic64_read(&sbi->read_hit_cached); 38 + si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); 39 39 si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; 40 - si->total_ext = atomic_read(&sbi->total_hit_ext); 40 + si->total_ext = atomic64_read(&sbi->total_hit_ext); 41 41 si->ext_tree = sbi->total_ext_tree; 42 42 si->ext_node = atomic_read(&sbi->total_ext_node); 43 43 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); ··· 118 118 } 119 119 } 120 120 dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); 121 - si->bimodal = div_u64(bimodal, dist); 121 + si->bimodal = div64_u64(bimodal, dist); 122 122 if (si->dirty_count) 123 123 si->avg_vblocks = div_u64(total_vblocks, ndirty); 124 124 else ··· 198 198 199 199 si->page_mem = 0; 200 200 npages = NODE_MAPPING(sbi)->nrpages; 201 - si->page_mem += npages << PAGE_CACHE_SHIFT; 201 + si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT; 202 202 npages = META_MAPPING(sbi)->nrpages; 203 - si->page_mem += npages << PAGE_CACHE_SHIFT; 203 + si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT; 204 204 } 205 205 206 206 static int stat_show(struct seq_file *s, void *v) ··· 283 283 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, 284 284 si->bg_node_blks); 285 285 seq_puts(s, "\nExtent Cache:\n"); 286 - seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n", 286 + seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", 287 287 si->hit_largest, si->hit_cached, 288 288 si->hit_rbtree); 289 - seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n", 289 + seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", 290 290 !si->total_ext ? 0 : 291 - (si->hit_total * 100) / si->total_ext, 291 + div64_u64(si->hit_total * 100, si->total_ext), 292 292 si->hit_total, si->total_ext); 293 293 seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", 294 294 si->ext_tree, si->ext_node); ··· 333 333 334 334 /* memory footprint */ 335 335 update_mem_info(si->sbi); 336 - seq_printf(s, "\nMemory: %u KB\n", 336 + seq_printf(s, "\nMemory: %llu KB\n", 337 337 (si->base_mem + si->cache_mem + si->page_mem) >> 10); 338 - seq_printf(s, " - static: %u KB\n", 338 + seq_printf(s, " - static: %llu KB\n", 339 339 si->base_mem >> 10); 340 - seq_printf(s, " - cached: %u KB\n", 340 + seq_printf(s, " - cached: %llu KB\n", 341 341 si->cache_mem >> 10); 342 - seq_printf(s, " - paged : %u KB\n", 342 + seq_printf(s, " - paged : %llu KB\n", 343 343 si->page_mem >> 10); 344 344 } 345 345 mutex_unlock(&f2fs_stat_mutex); ··· 378 378 si->sbi = sbi; 379 379 sbi->stat_info = si; 380 380 381 - atomic_set(&sbi->total_hit_ext, 0); 382 - atomic_set(&sbi->read_hit_rbtree, 0); 383 - atomic_set(&sbi->read_hit_largest, 0); 384 - atomic_set(&sbi->read_hit_cached, 0); 381 + atomic64_set(&sbi->total_hit_ext, 0); 382 + atomic64_set(&sbi->read_hit_rbtree, 0); 383 + atomic64_set(&sbi->read_hit_largest, 0); 384 + atomic64_set(&sbi->read_hit_cached, 0); 385 385 386 386 atomic_set(&sbi->inline_xattr, 0); 387 387 atomic_set(&sbi->inline_inode, 0);
+13 -6
fs/f2fs/dir.c
··· 258 258 if (f2fs_has_inline_dentry(dir)) 259 259 return f2fs_parent_inline_dir(dir, p); 260 260 261 - page = get_lock_data_page(dir, 0); 261 + page = get_lock_data_page(dir, 0, false); 262 262 if (IS_ERR(page)) 263 263 return NULL; 264 264 ··· 740 740 return f2fs_empty_inline_dir(dir); 741 741 742 742 for (bidx = 0; bidx < nblock; bidx++) { 743 - dentry_page = get_lock_data_page(dir, bidx); 743 + dentry_page = get_lock_data_page(dir, bidx, false); 744 744 if (IS_ERR(dentry_page)) { 745 745 if (PTR_ERR(dentry_page) == -ENOENT) 746 746 continue; ··· 787 787 else 788 788 d_type = DT_UNKNOWN; 789 789 790 - /* encrypted case */ 791 790 de_name.name = d->filename[bit_pos]; 792 791 de_name.len = le16_to_cpu(de->name_len); 793 792 ··· 794 795 int save_len = fstr->len; 795 796 int ret; 796 797 798 + de_name.name = kmalloc(de_name.len, GFP_NOFS); 799 + if (!de_name.name) 800 + return false; 801 + 802 + memcpy(de_name.name, d->filename[bit_pos], de_name.len); 803 + 797 804 ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code, 798 805 &de_name, fstr); 799 - de_name = *fstr; 800 - fstr->len = save_len; 806 + kfree(de_name.name); 801 807 if (ret < 0) 802 808 return true; 809 + 810 + de_name = *fstr; 811 + fstr->len = save_len; 803 812 } 804 813 805 814 if (!dir_emit(ctx, de_name.name, de_name.len, ··· 854 847 min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); 855 848 856 849 for (; n < npages; n++) { 857 - dentry_page = get_lock_data_page(inode, n); 850 + dentry_page = get_lock_data_page(inode, n, false); 858 851 if (IS_ERR(dentry_page)) 859 852 continue; 860 853
+82 -125
fs/f2fs/extent_cache.c
··· 155 155 return count - et->count; 156 156 } 157 157 158 - static void __drop_largest_extent(struct inode *inode, pgoff_t fofs) 158 + static void __drop_largest_extent(struct inode *inode, 159 + pgoff_t fofs, unsigned int len) 159 160 { 160 161 struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; 161 162 162 - if (largest->fofs <= fofs && largest->fofs + largest->len > fofs) 163 + if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) 163 164 largest->len = 0; 164 165 } 165 166 ··· 169 168 if (!f2fs_may_extent_tree(inode)) 170 169 return; 171 170 172 - __drop_largest_extent(inode, fofs); 171 + __drop_largest_extent(inode, fofs, 1); 173 172 } 174 173 175 174 void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) ··· 351 350 } 352 351 353 352 if (en) { 354 - if (en->ei.len > et->largest.len) 355 - et->largest = en->ei; 353 + __try_update_largest_extent(et, en); 356 354 et->cached_en = en; 357 355 } 358 356 return en; ··· 388 388 if (!en) 389 389 return NULL; 390 390 391 - if (en->ei.len > et->largest.len) 392 - et->largest = en->ei; 391 + __try_update_largest_extent(et, en); 393 392 et->cached_en = en; 394 393 return en; 395 394 } 396 395 397 - unsigned int f2fs_update_extent_tree_range(struct inode *inode, 396 + static unsigned int f2fs_update_extent_tree_range(struct inode *inode, 398 397 pgoff_t fofs, block_t blkaddr, unsigned int len) 399 398 { 400 399 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 401 400 struct extent_tree *et = F2FS_I(inode)->extent_tree; 402 - struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; 401 + struct extent_node *en = NULL, *en1 = NULL; 403 402 struct extent_node *prev_en = NULL, *next_en = NULL; 404 403 struct extent_info ei, dei, prev; 405 404 struct rb_node **insert_p = NULL, *insert_parent = NULL; ··· 407 408 408 409 if (!et) 409 410 return false; 411 + 412 + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); 410 413 411 414 write_lock(&et->lock); 412 415 ··· 420 419 prev = et->largest; 421 420 dei.len = 0; 422 421 423 - /* we do not guarantee that the largest extent is cached all the time */ 424 - __drop_largest_extent(inode, fofs); 422 + /* 423 + * drop largest extent before lookup, in case it's already 424 + * been shrunk from extent tree 425 + */ 426 + __drop_largest_extent(inode, fofs, len); 425 427 426 428 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 427 429 en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, 428 430 &insert_p, &insert_parent); 429 - if (!en) { 430 - if (next_en) { 431 - en = next_en; 432 - f2fs_bug_on(sbi, en->ei.fofs <= pos); 433 - pos = en->ei.fofs; 434 - } else { 435 - /* 436 - * skip searching in the tree since there is no 437 - * larger extent node in the cache. 438 - */ 439 - goto update_extent; 440 - } 441 - } 431 + if (!en) 432 + en = next_en; 442 433 443 434 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ 444 - while (en) { 445 - struct rb_node *node; 435 + while (en && en->ei.fofs < end) { 436 + unsigned int org_end; 437 + int parts = 0; /* # of parts current extent split into */ 446 438 447 - if (pos >= end) 448 - break; 439 + next_en = en1 = NULL; 449 440 450 441 dei = en->ei; 451 - en1 = en2 = NULL; 442 + org_end = dei.fofs + dei.len; 443 + f2fs_bug_on(sbi, pos >= org_end); 452 444 453 - node = rb_next(&en->rb_node); 454 - 455 - /* 456 - * 2.1 there are four cases when we invalidate blkaddr in extent 457 - * node, |V: valid address, X: will be invalidated| 458 - */ 459 - /* case#1, invalidate right part of extent node |VVVVVXXXXX| */ 460 - if (pos > dei.fofs && end >= dei.fofs + dei.len) { 461 - en->ei.len = pos - dei.fofs; 462 - 463 - if (en->ei.len < F2FS_MIN_EXTENT_LEN) { 464 - __detach_extent_node(sbi, et, en); 465 - insert_p = NULL; 466 - insert_parent = NULL; 467 - goto update; 468 - } 469 - 470 - if (__is_extent_same(&dei, &et->largest)) 471 - et->largest = en->ei; 472 - goto next; 445 + if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { 446 + en->ei.len = pos - en->ei.fofs; 447 + prev_en = en; 448 + parts = 1; 473 449 } 474 450 475 - /* case#2, invalidate left part of extent node |XXXXXVVVVV| */ 476 - if (pos <= dei.fofs && end < dei.fofs + dei.len) { 477 - en->ei.fofs = end; 478 - en->ei.blk += end - dei.fofs; 479 - en->ei.len -= end - dei.fofs; 480 - 481 - if (en->ei.len < F2FS_MIN_EXTENT_LEN) { 482 - __detach_extent_node(sbi, et, en); 483 - insert_p = NULL; 484 - insert_parent = NULL; 485 - goto update; 486 - } 487 - 488 - if (__is_extent_same(&dei, &et->largest)) 489 - et->largest = en->ei; 490 - goto next; 491 - } 492 - 493 - __detach_extent_node(sbi, et, en); 494 - 495 - /* 496 - * if we remove node in rb-tree, our parent node pointer may 497 - * point the wrong place, discard them. 498 - */ 499 - insert_p = NULL; 500 - insert_parent = NULL; 501 - 502 - /* case#3, invalidate entire extent node |XXXXXXXXXX| */ 503 - if (pos <= dei.fofs && end >= dei.fofs + dei.len) { 504 - if (__is_extent_same(&dei, &et->largest)) 505 - et->largest.len = 0; 506 - goto update; 507 - } 508 - 509 - /* 510 - * case#4, invalidate data in the middle of extent node 511 - * |VVVXXXXVVV| 512 - */ 513 - if (dei.len > F2FS_MIN_EXTENT_LEN) { 514 - unsigned int endofs; 515 - 516 - /* insert left part of split extent into cache */ 517 - if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { 518 - set_extent_info(&ei, dei.fofs, dei.blk, 519 - pos - dei.fofs); 520 - en1 = __insert_extent_tree(sbi, et, &ei, 521 - NULL, NULL); 522 - } 523 - 524 - /* insert right part of split extent into cache */ 525 - endofs = dei.fofs + dei.len; 526 - if (endofs - end >= F2FS_MIN_EXTENT_LEN) { 451 + if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { 452 + if (parts) { 527 453 set_extent_info(&ei, end, 528 454 end - dei.fofs + dei.blk, 529 - endofs - end); 530 - en2 = __insert_extent_tree(sbi, et, &ei, 531 - NULL, NULL); 455 + org_end - end); 456 + en1 = __insert_extent_tree(sbi, et, &ei, 457 + NULL, NULL); 458 + next_en = en1; 459 + } else { 460 + en->ei.fofs = end; 461 + en->ei.blk += end - dei.fofs; 462 + en->ei.len -= end - dei.fofs; 463 + next_en = en; 532 464 } 465 + parts++; 533 466 } 534 - update: 535 - /* 2.2 update in global extent list */ 467 + 468 + if (!next_en) { 469 + struct rb_node *node = rb_next(&en->rb_node); 470 + 471 + next_en = node ? 472 + rb_entry(node, struct extent_node, rb_node) 473 + : NULL; 474 + } 475 + 476 + if (parts) 477 + __try_update_largest_extent(et, en); 478 + else 479 + __detach_extent_node(sbi, et, en); 480 + 481 + /* 482 + * if original extent is split into zero or two parts, extent 483 + * tree has been altered by deletion or insertion, therefore 484 + * invalidate pointers regard to tree. 485 + */ 486 + if (parts != 1) { 487 + insert_p = NULL; 488 + insert_parent = NULL; 489 + } 490 + 491 + /* update in global extent list */ 536 492 spin_lock(&sbi->extent_lock); 537 - if (en && !list_empty(&en->list)) 493 + if (!parts && !list_empty(&en->list)) 538 494 list_del(&en->list); 539 495 if (en1) 540 496 list_add_tail(&en1->list, &sbi->extent_list); 541 - if (en2) 542 - list_add_tail(&en2->list, &sbi->extent_list); 543 497 spin_unlock(&sbi->extent_lock); 544 498 545 - /* 2.3 release extent node */ 546 - if (en) 499 + /* release extent node */ 500 + if (!parts) 547 501 kmem_cache_free(extent_node_slab, en); 548 - next: 549 - en = node ? rb_entry(node, struct extent_node, rb_node) : NULL; 550 - next_en = en; 551 - if (en) 552 - pos = en->ei.fofs; 502 + 503 + en = next_en; 553 504 } 554 505 555 - update_extent: 556 506 /* 3. update extent in extent cache */ 557 507 if (blkaddr) { 558 508 struct extent_node *den = NULL; 559 509 560 510 set_extent_info(&ei, fofs, blkaddr, len); 561 - en3 = __try_merge_extent_node(sbi, et, &ei, &den, 511 + en1 = __try_merge_extent_node(sbi, et, &ei, &den, 562 512 prev_en, next_en); 563 - if (!en3) 564 - en3 = __insert_extent_tree(sbi, et, &ei, 513 + if (!en1) 514 + en1 = __insert_extent_tree(sbi, et, &ei, 565 515 insert_p, insert_parent); 566 516 567 517 /* give up extent_cache, if split and small updates happen */ ··· 524 572 } 525 573 526 574 spin_lock(&sbi->extent_lock); 527 - if (en3) { 528 - if (list_empty(&en3->list)) 529 - list_add_tail(&en3->list, &sbi->extent_list); 575 + if (en1) { 576 + if (list_empty(&en1->list)) 577 + list_add_tail(&en1->list, &sbi->extent_list); 530 578 else 531 - list_move_tail(&en3->list, &sbi->extent_list); 579 + list_move_tail(&en1->list, &sbi->extent_list); 532 580 } 533 581 if (den && !list_empty(&den->list)) 534 582 list_del(&den->list); ··· 602 650 } 603 651 spin_unlock(&sbi->extent_lock); 604 652 653 + /* 654 + * reset ino for searching victims from beginning of global extent tree. 655 + */ 656 + ino = F2FS_ROOT_INO(sbi); 657 + 605 658 while ((found = radix_tree_gang_lookup(root, 606 659 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 607 660 unsigned i; ··· 620 663 write_unlock(&et->lock); 621 664 622 665 if (node_cnt + tree_cnt >= nr_shrink) 623 - break; 666 + goto unlock_out; 624 667 } 625 668 } 626 669 unlock_out:
+72 -14
fs/f2fs/f2fs.h
··· 19 19 #include <linux/magic.h> 20 20 #include <linux/kobject.h> 21 21 #include <linux/sched.h> 22 + #include <linux/vmalloc.h> 22 23 #include <linux/bio.h> 23 24 24 25 #ifdef CONFIG_F2FS_CHECK_FS ··· 53 52 #define F2FS_MOUNT_NOBARRIER 0x00000800 54 53 #define F2FS_MOUNT_FASTBOOT 0x00001000 55 54 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 55 + #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 56 56 57 57 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 58 58 #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) ··· 124 122 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) 125 123 #define BATCHED_TRIM_BLOCKS(sbi) \ 126 124 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 125 + #define DEF_CP_INTERVAL 60 /* 60 secs */ 127 126 128 127 struct cp_control { 129 128 int reason; ··· 233 230 #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 234 231 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 235 232 #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) 233 + #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) 236 234 237 235 #define F2FS_IOC_SET_ENCRYPTION_POLICY \ 238 236 _IOR('f', 19, struct f2fs_encryption_policy) ··· 250 246 #define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ 251 247 #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ 252 248 #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ 249 + #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ 253 250 254 251 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 255 252 /* ··· 497 492 return __is_extent_mergeable(cur, front); 498 493 } 499 494 495 + static inline void __try_update_largest_extent(struct extent_tree *et, 496 + struct extent_node *en) 497 + { 498 + if (en->ei.len > et->largest.len) 499 + et->largest = en->ei; 500 + } 501 + 500 502 struct f2fs_nm_info { 501 503 block_t nat_blkaddr; /* base disk address of NAT */ 502 504 nid_t max_nid; /* maximum possible node ids */ 503 505 nid_t available_nids; /* maximum available node ids */ 504 506 nid_t next_scan_nid; /* the next nid to be scanned */ 505 507 unsigned int ram_thresh; /* control the memory footprint */ 508 + unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ 506 509 507 510 /* NAT cache management */ 508 511 struct radix_tree_root nat_root;/* root of the nat entry cache */ ··· 737 724 struct rw_semaphore node_write; /* locking node writes */ 738 725 struct mutex writepages; /* mutex for writepages() */ 739 726 wait_queue_head_t cp_wait; 727 + long cp_expires, cp_interval; /* next expected periodic cp */ 740 728 741 729 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 742 730 ··· 801 787 unsigned int segment_count[2]; /* # of allocated segments */ 802 788 unsigned int block_count[2]; /* # of allocated blocks */ 803 789 atomic_t inplace_count; /* # of inplace update */ 804 - atomic_t total_hit_ext; /* # of lookup extent cache */ 805 - atomic_t read_hit_rbtree; /* # of hit rbtree extent node */ 806 - atomic_t read_hit_largest; /* # of hit largest extent node */ 807 - atomic_t read_hit_cached; /* # of hit cached extent node */ 790 + atomic64_t total_hit_ext; /* # of lookup extent cache */ 791 + atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */ 792 + atomic64_t read_hit_largest; /* # of hit largest extent node */ 793 + atomic64_t read_hit_cached; /* # of hit cached extent node */ 808 794 atomic_t inline_xattr; /* # of inline_xattr inodes */ 809 795 atomic_t inline_inode; /* # of inline_data inodes */ 810 796 atomic_t inline_dir; /* # of inline_dentry inodes */ ··· 1234 1220 return sbi->total_valid_inode_count; 1235 1221 } 1236 1222 1223 + static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, 1224 + pgoff_t index, bool for_write) 1225 + { 1226 + if (!for_write) 1227 + return grab_cache_page(mapping, index); 1228 + return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 1229 + } 1230 + 1231 + static inline void f2fs_copy_page(struct page *src, struct page *dst) 1232 + { 1233 + char *src_kaddr = kmap(src); 1234 + char *dst_kaddr = kmap(dst); 1235 + 1236 + memcpy(dst_kaddr, src_kaddr, PAGE_SIZE); 1237 + kunmap(dst); 1238 + kunmap(src); 1239 + } 1240 + 1237 1241 static inline void f2fs_put_page(struct page *page, int unlock) 1238 1242 { 1239 1243 if (!page) ··· 1611 1579 return S_ISREG(mode); 1612 1580 } 1613 1581 1582 + static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) 1583 + { 1584 + void *ret; 1585 + 1586 + ret = kmalloc(size, flags | __GFP_NOWARN); 1587 + if (!ret) 1588 + ret = __vmalloc(size, flags, PAGE_KERNEL); 1589 + return ret; 1590 + } 1591 + 1592 + static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) 1593 + { 1594 + void *ret; 1595 + 1596 + ret = kzalloc(size, flags | __GFP_NOWARN); 1597 + if (!ret) 1598 + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 1599 + return ret; 1600 + } 1601 + 1614 1602 #define get_inode_mode(i) \ 1615 1603 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1616 1604 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) ··· 1773 1721 int create_flush_cmd_control(struct f2fs_sb_info *); 1774 1722 void destroy_flush_cmd_control(struct f2fs_sb_info *); 1775 1723 void invalidate_blocks(struct f2fs_sb_info *, block_t); 1724 + bool is_checkpointed_data(struct f2fs_sb_info *, block_t); 1776 1725 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1777 1726 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 1778 1727 void release_discard_addrs(struct f2fs_sb_info *); ··· 1792 1739 void allocate_data_block(struct f2fs_sb_info *, struct page *, 1793 1740 block_t, block_t *, struct f2fs_summary *, int); 1794 1741 void f2fs_wait_on_page_writeback(struct page *, enum page_type); 1742 + void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t); 1795 1743 void write_data_summaries(struct f2fs_sb_info *, block_t); 1796 1744 void write_node_summaries(struct f2fs_sb_info *, block_t); 1797 1745 int lookup_journal_in_cursum(struct f2fs_summary_block *, ··· 1808 1754 */ 1809 1755 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1810 1756 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1757 + struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); 1811 1758 bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); 1812 - int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); 1759 + int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); 1813 1760 void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); 1814 1761 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1815 1762 void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); ··· 1842 1787 int reserve_new_block(struct dnode_of_data *); 1843 1788 int f2fs_get_block(struct dnode_of_data *, pgoff_t); 1844 1789 int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1845 - struct page *get_read_data_page(struct inode *, pgoff_t, int); 1790 + struct page *get_read_data_page(struct inode *, pgoff_t, int, bool); 1846 1791 struct page *find_data_page(struct inode *, pgoff_t); 1847 - struct page *get_lock_data_page(struct inode *, pgoff_t); 1792 + struct page *get_lock_data_page(struct inode *, pgoff_t, bool); 1848 1793 struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1849 1794 int do_write_data_page(struct f2fs_io_info *); 1850 1795 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); ··· 1857 1802 int start_gc_thread(struct f2fs_sb_info *); 1858 1803 void stop_gc_thread(struct f2fs_sb_info *); 1859 1804 block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); 1860 - int f2fs_gc(struct f2fs_sb_info *); 1805 + int f2fs_gc(struct f2fs_sb_info *, bool); 1861 1806 void build_gc_manager(struct f2fs_sb_info *); 1862 1807 1863 1808 /* ··· 1875 1820 struct f2fs_sb_info *sbi; 1876 1821 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1877 1822 int main_area_segs, main_area_sections, main_area_zones; 1878 - int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext; 1823 + unsigned long long hit_largest, hit_cached, hit_rbtree; 1824 + unsigned long long hit_total, total_ext; 1879 1825 int ext_tree, ext_node; 1880 1826 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1881 1827 int nats, dirty_nats, sits, dirty_sits, fnids; ··· 1900 1844 unsigned int segment_count[2]; 1901 1845 unsigned int block_count[2]; 1902 1846 unsigned int inplace_count; 1903 - unsigned base_mem, cache_mem, page_mem; 1847 + unsigned long long base_mem, cache_mem, page_mem; 1904 1848 }; 1905 1849 1906 1850 static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ··· 1913 1857 #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1914 1858 #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1915 1859 #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1916 - #define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext)) 1917 - #define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree)) 1918 - #define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest)) 1919 - #define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached)) 1860 + #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) 1861 + #define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) 1862 + #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) 1863 + #define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached)) 1920 1864 #define stat_inc_inline_xattr(inode) \ 1921 1865 do { \ 1922 1866 if (f2fs_has_inline_xattr(inode)) \ ··· 2054 1998 bool f2fs_empty_inline_dir(struct inode *); 2055 1999 int f2fs_read_inline_dir(struct file *, struct dir_context *, 2056 2000 struct f2fs_str *); 2001 + int f2fs_inline_data_fiemap(struct inode *, 2002 + struct fiemap_extent_info *, __u64, __u64); 2057 2003 2058 2004 /* 2059 2005 * shrinker.c
+173 -166
fs/f2fs/file.c
··· 74 74 goto mapped; 75 75 76 76 /* page is wholly or partially inside EOF */ 77 - if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { 77 + if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) > 78 + i_size_read(inode)) { 78 79 unsigned offset; 79 80 offset = i_size_read(inode) & ~PAGE_CACHE_MASK; 80 81 zero_user_segment(page, offset, PAGE_CACHE_SIZE); ··· 87 86 mapped: 88 87 /* fill the page */ 89 88 f2fs_wait_on_page_writeback(page, DATA); 89 + 90 + /* wait for GCed encrypted page writeback */ 91 + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 92 + f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); 93 + 90 94 /* if gced page is attached, don't write to cold segment */ 91 95 clear_cold_data(page); 92 96 out: ··· 349 343 350 344 dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); 351 345 352 - for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { 346 + for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { 353 347 set_new_dnode(&dn, inode, NULL, NULL, 0); 354 348 err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); 355 349 if (err && err != -ENOENT) { ··· 510 504 return 0; 511 505 512 506 if (cache_only) { 513 - page = grab_cache_page(mapping, index); 507 + page = f2fs_grab_cache_page(mapping, index, false); 514 508 if (page && PageUptodate(page)) 515 509 goto truncate_out; 516 510 f2fs_put_page(page, 1); 517 511 return 0; 518 512 } 519 513 520 - page = get_lock_data_page(inode, index); 514 + page = get_lock_data_page(inode, index, true); 521 515 if (IS_ERR(page)) 522 516 return 0; 523 517 truncate_out: ··· 686 680 * larger than i_size. 687 681 */ 688 682 truncate_setsize(inode, attr->ia_size); 683 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 689 684 } 690 685 } 691 686 ··· 745 738 746 739 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 747 740 { 748 - pgoff_t index; 749 741 int err; 750 742 751 - for (index = pg_start; index < pg_end; index++) { 743 + while (pg_start < pg_end) { 752 744 struct dnode_of_data dn; 745 + pgoff_t end_offset, count; 753 746 754 747 set_new_dnode(&dn, inode, NULL, NULL, 0); 755 - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 748 + err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 756 749 if (err) { 757 - if (err == -ENOENT) 750 + if (err == -ENOENT) { 751 + pg_start++; 758 752 continue; 753 + } 759 754 return err; 760 755 } 761 756 762 - if (dn.data_blkaddr != NULL_ADDR) 763 - truncate_data_blocks_range(&dn, 1); 757 + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 758 + count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); 759 + 760 + f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); 761 + 762 + truncate_data_blocks_range(&dn, count); 764 763 f2fs_put_dnode(&dn); 764 + 765 + pg_start += count; 765 766 } 766 767 return 0; 767 768 } ··· 779 764 pgoff_t pg_start, pg_end; 780 765 loff_t off_start, off_end; 781 766 int ret = 0; 782 - 783 - if (!S_ISREG(inode->i_mode)) 784 - return -EOPNOTSUPP; 785 767 786 768 if (f2fs_has_inline_data(inode)) { 787 769 ret = f2fs_convert_inline_inode(inode); ··· 817 805 818 806 f2fs_balance_fs(sbi); 819 807 820 - blk_start = pg_start << PAGE_CACHE_SHIFT; 821 - blk_end = pg_end << PAGE_CACHE_SHIFT; 808 + blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT; 809 + blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT; 822 810 truncate_inode_pages_range(mapping, blk_start, 823 811 blk_end - 1); 824 812 ··· 831 819 return ret; 832 820 } 833 821 834 - static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) 822 + static int __exchange_data_block(struct inode *inode, pgoff_t src, 823 + pgoff_t dst, bool full) 835 824 { 836 825 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 837 826 struct dnode_of_data dn; 827 + block_t new_addr; 828 + bool do_replace = false; 829 + int ret; 830 + 831 + set_new_dnode(&dn, inode, NULL, NULL, 0); 832 + ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA); 833 + if (ret && ret != -ENOENT) { 834 + return ret; 835 + } else if (ret == -ENOENT) { 836 + new_addr = NULL_ADDR; 837 + } else { 838 + new_addr = dn.data_blkaddr; 839 + if (!is_checkpointed_data(sbi, new_addr)) { 840 + dn.data_blkaddr = NULL_ADDR; 841 + /* do not invalidate this block address */ 842 + set_data_blkaddr(&dn); 843 + f2fs_update_extent_cache(&dn); 844 + do_replace = true; 845 + } 846 + f2fs_put_dnode(&dn); 847 + } 848 + 849 + if (new_addr == NULL_ADDR) 850 + return full ? truncate_hole(inode, dst, dst + 1) : 0; 851 + 852 + if (do_replace) { 853 + struct page *ipage = get_node_page(sbi, inode->i_ino); 854 + struct node_info ni; 855 + 856 + if (IS_ERR(ipage)) { 857 + ret = PTR_ERR(ipage); 858 + goto err_out; 859 + } 860 + 861 + set_new_dnode(&dn, inode, ipage, NULL, 0); 862 + ret = f2fs_reserve_block(&dn, dst); 863 + if (ret) 864 + goto err_out; 865 + 866 + truncate_data_blocks_range(&dn, 1); 867 + 868 + get_node_info(sbi, dn.nid, &ni); 869 + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, 870 + ni.version, true); 871 + f2fs_put_dnode(&dn); 872 + } else { 873 + struct page *psrc, *pdst; 874 + 875 + psrc = get_lock_data_page(inode, src, true); 876 + if (IS_ERR(psrc)) 877 + return PTR_ERR(psrc); 878 + pdst = get_new_data_page(inode, NULL, dst, false); 879 + if (IS_ERR(pdst)) { 880 + f2fs_put_page(psrc, 1); 881 + return PTR_ERR(pdst); 882 + } 883 + f2fs_copy_page(psrc, pdst); 884 + set_page_dirty(pdst); 885 + f2fs_put_page(pdst, 1); 886 + f2fs_put_page(psrc, 1); 887 + 888 + return truncate_hole(inode, src, src + 1); 889 + } 890 + return 0; 891 + 892 + err_out: 893 + if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) { 894 + dn.data_blkaddr = new_addr; 895 + set_data_blkaddr(&dn); 896 + f2fs_update_extent_cache(&dn); 897 + f2fs_put_dnode(&dn); 898 + } 899 + return ret; 900 + } 901 + 902 + static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) 903 + { 904 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 838 905 pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; 839 906 int ret = 0; 840 907 841 908 for (; end < nrpages; start++, end++) { 842 - block_t new_addr, old_addr; 843 - 909 + f2fs_balance_fs(sbi); 844 910 f2fs_lock_op(sbi); 845 - 846 - set_new_dnode(&dn, inode, NULL, NULL, 0); 847 - ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); 848 - if (ret && ret != -ENOENT) { 849 - goto out; 850 - } else if (ret == -ENOENT) { 851 - new_addr = NULL_ADDR; 852 - } else { 853 - new_addr = dn.data_blkaddr; 854 - truncate_data_blocks_range(&dn, 1); 855 - f2fs_put_dnode(&dn); 856 - } 857 - 858 - if (new_addr == NULL_ADDR) { 859 - set_new_dnode(&dn, inode, NULL, NULL, 0); 860 - ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); 861 - if (ret && ret != -ENOENT) { 862 - goto out; 863 - } else if (ret == -ENOENT) { 864 - f2fs_unlock_op(sbi); 865 - continue; 866 - } 867 - 868 - if (dn.data_blkaddr == NULL_ADDR) { 869 - f2fs_put_dnode(&dn); 870 - f2fs_unlock_op(sbi); 871 - continue; 872 - } else { 873 - truncate_data_blocks_range(&dn, 1); 874 - } 875 - 876 - f2fs_put_dnode(&dn); 877 - } else { 878 - struct page *ipage; 879 - 880 - ipage = get_node_page(sbi, inode->i_ino); 881 - if (IS_ERR(ipage)) { 882 - ret = PTR_ERR(ipage); 883 - goto out; 884 - } 885 - 886 - set_new_dnode(&dn, inode, ipage, NULL, 0); 887 - ret = f2fs_reserve_block(&dn, start); 888 - if (ret) 889 - goto out; 890 - 891 - old_addr = dn.data_blkaddr; 892 - if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) { 893 - dn.data_blkaddr = NULL_ADDR; 894 - f2fs_update_extent_cache(&dn); 895 - invalidate_blocks(sbi, old_addr); 896 - 897 - dn.data_blkaddr = new_addr; 898 - set_data_blkaddr(&dn); 899 - } else if (new_addr != NEW_ADDR) { 900 - struct node_info ni; 901 - 902 - get_node_info(sbi, dn.nid, &ni); 903 - f2fs_replace_block(sbi, &dn, old_addr, new_addr, 904 - ni.version, true); 905 - } 906 - 907 - f2fs_put_dnode(&dn); 908 - } 911 + ret = __exchange_data_block(inode, end, start, true); 909 912 f2fs_unlock_op(sbi); 913 + if (ret) 914 + break; 910 915 } 911 - return 0; 912 - out: 913 - f2fs_unlock_op(sbi); 914 916 return ret; 915 917 } 916 918 ··· 933 907 pgoff_t pg_start, pg_end; 934 908 loff_t new_size; 935 909 int ret; 936 - 937 - if (!S_ISREG(inode->i_mode)) 938 - return -EINVAL; 939 910 940 911 if (offset + len >= i_size_read(inode)) 941 912 return -EINVAL; ··· 963 940 if (ret) 964 941 return ret; 965 942 943 + /* write out all moved pages, if possible */ 944 + filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 945 + truncate_pagecache(inode, offset); 946 + 966 947 new_size = i_size_read(inode) - len; 948 + truncate_pagecache(inode, new_size); 967 949 968 950 ret = truncate_blocks(inode, new_size, true); 969 951 if (!ret) ··· 986 958 loff_t new_size = i_size_read(inode); 987 959 loff_t off_start, off_end; 988 960 int ret = 0; 989 - 990 - if (!S_ISREG(inode->i_mode)) 991 - return -EINVAL; 992 961 993 962 ret = inode_newsize_ok(inode, (len + offset)); 994 963 if (ret) ··· 1028 1003 return ret; 1029 1004 1030 1005 new_size = max_t(loff_t, new_size, 1031 - pg_start << PAGE_CACHE_SHIFT); 1006 + (loff_t)pg_start << PAGE_CACHE_SHIFT); 1032 1007 } 1033 1008 1034 1009 for (index = pg_start; index < pg_end; index++) { ··· 1064 1039 f2fs_unlock_op(sbi); 1065 1040 1066 1041 new_size = max_t(loff_t, new_size, 1067 - (index + 1) << PAGE_CACHE_SHIFT); 1042 + (loff_t)(index + 1) << PAGE_CACHE_SHIFT); 1068 1043 } 1069 1044 1070 1045 if (off_end) { ··· 1091 1066 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1092 1067 pgoff_t pg_start, pg_end, delta, nrpages, idx; 1093 1068 loff_t new_size; 1094 - int ret; 1095 - 1096 - if (!S_ISREG(inode->i_mode)) 1097 - return -EINVAL; 1069 + int ret = 0; 1098 1070 1099 1071 new_size = i_size_read(inode) + len; 1100 1072 if (new_size > inode->i_sb->s_maxbytes) ··· 1129 1107 nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; 1130 1108 1131 1109 for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { 1132 - struct dnode_of_data dn; 1133 - struct page *ipage; 1134 - block_t new_addr, old_addr; 1135 - 1136 1110 f2fs_lock_op(sbi); 1137 - 1138 - set_new_dnode(&dn, inode, NULL, NULL, 0); 1139 - ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA); 1140 - if (ret && ret != -ENOENT) { 1141 - goto out; 1142 - } else if (ret == -ENOENT) { 1143 - goto next; 1144 - } else if (dn.data_blkaddr == NULL_ADDR) { 1145 - f2fs_put_dnode(&dn); 1146 - goto next; 1147 - } else { 1148 - new_addr = dn.data_blkaddr; 1149 - truncate_data_blocks_range(&dn, 1); 1150 - f2fs_put_dnode(&dn); 1151 - } 1152 - 1153 - ipage = get_node_page(sbi, inode->i_ino); 1154 - if (IS_ERR(ipage)) { 1155 - ret = PTR_ERR(ipage); 1156 - goto out; 1157 - } 1158 - 1159 - set_new_dnode(&dn, inode, ipage, NULL, 0); 1160 - ret = f2fs_reserve_block(&dn, idx + delta); 1161 - if (ret) 1162 - goto out; 1163 - 1164 - old_addr = dn.data_blkaddr; 1165 - f2fs_bug_on(sbi, old_addr != NEW_ADDR); 1166 - 1167 - if (new_addr != NEW_ADDR) { 1168 - struct node_info ni; 1169 - 1170 - get_node_info(sbi, dn.nid, &ni); 1171 - f2fs_replace_block(sbi, &dn, old_addr, new_addr, 1172 - ni.version, true); 1173 - } 1174 - f2fs_put_dnode(&dn); 1175 - next: 1111 + ret = __exchange_data_block(inode, idx, idx + delta, false); 1176 1112 f2fs_unlock_op(sbi); 1113 + if (ret) 1114 + break; 1177 1115 } 1178 1116 1179 - i_size_write(inode, new_size); 1180 - return 0; 1181 - out: 1182 - f2fs_unlock_op(sbi); 1117 + /* write out all moved pages, if possible */ 1118 + filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1119 + truncate_pagecache(inode, offset); 1120 + 1121 + if (!ret) 1122 + i_size_write(inode, new_size); 1183 1123 return ret; 1184 1124 } 1185 1125 ··· 1188 1204 if (pg_start == pg_end) 1189 1205 new_size = offset + len; 1190 1206 else if (index == pg_start && off_start) 1191 - new_size = (index + 1) << PAGE_CACHE_SHIFT; 1207 + new_size = (loff_t)(index + 1) << PAGE_CACHE_SHIFT; 1192 1208 else if (index == pg_end) 1193 - new_size = (index << PAGE_CACHE_SHIFT) + off_end; 1209 + new_size = ((loff_t)index << PAGE_CACHE_SHIFT) + 1210 + off_end; 1194 1211 else 1195 1212 new_size += PAGE_CACHE_SIZE; 1196 1213 } ··· 1212 1227 { 1213 1228 struct inode *inode = file_inode(file); 1214 1229 long ret = 0; 1230 + 1231 + /* f2fs only support ->fallocate for regular file */ 1232 + if (!S_ISREG(inode->i_mode)) 1233 + return -EINVAL; 1215 1234 1216 1235 if (f2fs_encrypted_inode(inode) && 1217 1236 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) ··· 1426 1437 if (!f2fs_is_first_block_written(inode)) 1427 1438 return truncate_partial_data_page(inode, 0, true); 1428 1439 1429 - punch_hole(inode, 0, F2FS_BLKSIZE); 1430 - return 0; 1440 + return punch_hole(inode, 0, F2FS_BLKSIZE); 1431 1441 } 1432 1442 1433 1443 static int f2fs_ioc_abort_volatile_write(struct file *filp) ··· 1443 1455 1444 1456 f2fs_balance_fs(F2FS_I_SB(inode)); 1445 1457 1446 - if (f2fs_is_atomic_file(inode)) { 1447 - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1448 - commit_inmem_pages(inode, true); 1449 - } 1450 - 1451 - if (f2fs_is_volatile_file(inode)) 1452 - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1458 + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1459 + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1460 + commit_inmem_pages(inode, true); 1453 1461 1454 1462 mnt_drop_write_file(filp); 1455 1463 return ret; ··· 1478 1494 f2fs_stop_checkpoint(sbi); 1479 1495 break; 1480 1496 case F2FS_GOING_DOWN_NOSYNC: 1497 + f2fs_stop_checkpoint(sbi); 1498 + break; 1499 + case F2FS_GOING_DOWN_METAFLUSH: 1500 + sync_meta_pages(sbi, META, LONG_MAX); 1481 1501 f2fs_stop_checkpoint(sbi); 1482 1502 break; 1483 1503 default: ··· 1604 1616 { 1605 1617 struct inode *inode = file_inode(filp); 1606 1618 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1607 - __u32 i, count; 1619 + __u32 sync; 1608 1620 1609 1621 if (!capable(CAP_SYS_ADMIN)) 1610 1622 return -EPERM; 1611 1623 1612 - if (get_user(count, (__u32 __user *)arg)) 1624 + if (get_user(sync, (__u32 __user *)arg)) 1613 1625 return -EFAULT; 1614 1626 1615 - if (!count || count > F2FS_BATCH_GC_MAX_NUM) 1616 - return -EINVAL; 1627 + if (f2fs_readonly(sbi->sb)) 1628 + return -EROFS; 1617 1629 1618 - for (i = 0; i < count; i++) { 1630 + if (!sync) { 1619 1631 if (!mutex_trylock(&sbi->gc_mutex)) 1620 - break; 1621 - 1622 - if (f2fs_gc(sbi)) 1623 - break; 1632 + return -EBUSY; 1633 + } else { 1634 + mutex_lock(&sbi->gc_mutex); 1624 1635 } 1625 1636 1626 - if (put_user(i, (__u32 __user *)arg)) 1627 - return -EFAULT; 1637 + return f2fs_gc(sbi, sync); 1638 + } 1639 + 1640 + static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) 1641 + { 1642 + struct inode *inode = file_inode(filp); 1643 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1644 + struct cp_control cpc; 1645 + 1646 + if (!capable(CAP_SYS_ADMIN)) 1647 + return -EPERM; 1648 + 1649 + if (f2fs_readonly(sbi->sb)) 1650 + return -EROFS; 1651 + 1652 + cpc.reason = __get_cp_reason(sbi); 1653 + 1654 + mutex_lock(&sbi->gc_mutex); 1655 + write_checkpoint(sbi, &cpc); 1656 + mutex_unlock(&sbi->gc_mutex); 1628 1657 1629 1658 return 0; 1630 1659 } ··· 1677 1672 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 1678 1673 case F2FS_IOC_GARBAGE_COLLECT: 1679 1674 return f2fs_ioc_gc(filp, arg); 1675 + case F2FS_IOC_WRITE_CHECKPOINT: 1676 + return f2fs_ioc_write_checkpoint(filp, arg); 1680 1677 default: 1681 1678 return -ENOTTY; 1682 1679 }
+55 -22
fs/f2fs/gc.c
··· 78 78 stat_inc_bggc_count(sbi); 79 79 80 80 /* if return value is not zero, no victim was selected */ 81 - if (f2fs_gc(sbi)) 81 + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC))) 82 82 wait_ms = gc_th->no_gc_sleep_time; 83 + 84 + trace_f2fs_background_gc(sbi->sb, wait_ms, 85 + prefree_segments(sbi), free_segments(sbi)); 83 86 84 87 /* balancing f2fs's metadata periodically */ 85 88 f2fs_balance_fs_bg(sbi); ··· 260 257 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 261 258 struct victim_sel_policy p; 262 259 unsigned int secno, max_cost; 260 + unsigned int last_segment = MAIN_SEGS(sbi); 263 261 int nsearched = 0; 264 262 265 263 mutex_lock(&dirty_i->seglist_lock); ··· 270 266 271 267 p.min_segno = NULL_SEGNO; 272 268 p.min_cost = max_cost = get_max_cost(sbi, &p); 269 + 270 + if (p.max_search == 0) 271 + goto out; 273 272 274 273 if (p.alloc_mode == LFS && gc_type == FG_GC) { 275 274 p.min_segno = check_bg_victims(sbi); ··· 284 277 unsigned long cost; 285 278 unsigned int segno; 286 279 287 - segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); 288 - if (segno >= MAIN_SEGS(sbi)) { 280 + segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); 281 + if (segno >= last_segment) { 289 282 if (sbi->last_victim[p.gc_mode]) { 283 + last_segment = sbi->last_victim[p.gc_mode]; 290 284 sbi->last_victim[p.gc_mode] = 0; 291 285 p.offset = 0; 292 286 continue; ··· 335 327 sbi->cur_victim_sec, 336 328 prefree_segments(sbi), free_segments(sbi)); 337 329 } 330 + out: 338 331 mutex_unlock(&dirty_i->seglist_lock); 339 332 340 333 return (p.min_segno == NULL_SEGNO) ? 0 : 1; ··· 550 541 int err; 551 542 552 543 /* do not read out */ 553 - page = grab_cache_page(inode->i_mapping, bidx); 544 + page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); 554 545 if (!page) 555 546 return; 556 547 ··· 559 550 if (err) 560 551 goto out; 561 552 562 - if (unlikely(dn.data_blkaddr == NULL_ADDR)) 553 + if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 554 + ClearPageUptodate(page); 563 555 goto put_out; 556 + } 557 + 558 + /* 559 + * don't cache encrypted data into meta inode until previous dirty 560 + * data were writebacked to avoid racing between GC and flush. 561 + */ 562 + f2fs_wait_on_page_writeback(page, DATA); 564 563 565 564 get_node_info(fio.sbi, dn.nid, &ni); 566 565 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); ··· 597 580 goto put_page_out; 598 581 599 582 set_page_dirty(fio.encrypted_page); 600 - f2fs_wait_on_page_writeback(fio.encrypted_page, META); 583 + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA); 601 584 if (clear_page_dirty_for_io(fio.encrypted_page)) 602 585 dec_page_count(fio.sbi, F2FS_DIRTY_META); 603 586 ··· 628 611 { 629 612 struct page *page; 630 613 631 - page = get_lock_data_page(inode, bidx); 614 + page = get_lock_data_page(inode, bidx, true); 632 615 if (IS_ERR(page)) 633 616 return; 634 617 ··· 722 705 723 706 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); 724 707 data_page = get_read_data_page(inode, 725 - start_bidx + ofs_in_node, READA); 708 + start_bidx + ofs_in_node, READA, true); 726 709 if (IS_ERR(data_page)) { 727 710 iput(inode); 728 711 continue; ··· 814 797 return nfree; 815 798 } 816 799 817 - int f2fs_gc(struct f2fs_sb_info *sbi) 800 + int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) 818 801 { 819 - unsigned int segno = NULL_SEGNO; 820 - unsigned int i; 821 - int gc_type = BG_GC; 822 - int nfree = 0; 823 - int ret = -1; 802 + unsigned int segno, i; 803 + int gc_type = sync ? FG_GC : BG_GC; 804 + int sec_freed = 0; 805 + int ret = -EINVAL; 824 806 struct cp_control cpc; 825 807 struct gc_inode_list gc_list = { 826 808 .ilist = LIST_HEAD_INIT(gc_list.ilist), ··· 828 812 829 813 cpc.reason = __get_cp_reason(sbi); 830 814 gc_more: 815 + segno = NULL_SEGNO; 816 + 831 817 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 832 818 goto stop; 833 819 if (unlikely(f2fs_cp_error(sbi))) 834 820 goto stop; 835 821 836 - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 822 + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { 837 823 gc_type = FG_GC; 838 824 if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) 839 825 write_checkpoint(sbi, &cpc); ··· 848 830 /* readahead multi ssa blocks those have contiguous address */ 849 831 if (sbi->segs_per_sec > 1) 850 832 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, 851 - META_SSA); 833 + META_SSA, true); 852 834 853 - for (i = 0; i < sbi->segs_per_sec; i++) 854 - nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type); 835 + for (i = 0; i < sbi->segs_per_sec; i++) { 836 + /* 837 + * for FG_GC case, halt gcing left segments once failed one 838 + * of segments in selected section to avoid long latency. 839 + */ 840 + if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) && 841 + gc_type == FG_GC) 842 + break; 843 + } 844 + 845 + if (i == sbi->segs_per_sec && gc_type == FG_GC) 846 + sec_freed++; 855 847 856 848 if (gc_type == FG_GC) 857 849 sbi->cur_victim_sec = NULL_SEGNO; 858 850 859 - if (has_not_enough_free_secs(sbi, nfree)) 860 - goto gc_more; 851 + if (!sync) { 852 + if (has_not_enough_free_secs(sbi, sec_freed)) 853 + goto gc_more; 861 854 862 - if (gc_type == FG_GC) 863 - write_checkpoint(sbi, &cpc); 855 + if (gc_type == FG_GC) 856 + write_checkpoint(sbi, &cpc); 857 + } 864 858 stop: 865 859 mutex_unlock(&sbi->gc_mutex); 866 860 867 861 put_gc_inode(&gc_list); 862 + 863 + if (sync) 864 + ret = sec_freed ? 0 : -EAGAIN; 868 865 return ret; 869 866 } 870 867
-6
fs/f2fs/gc.h
··· 19 19 #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ 20 20 #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 21 21 22 - /* 23 - * with this macro, we can control the max time we do garbage collection, 24 - * when user triggers batch mode gc by ioctl. 25 - */ 26 - #define F2FS_BATCH_GC_MAX_NUM 16 27 - 28 22 /* Search max. number of dirty segments to select a victim segment */ 29 23 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 30 24
+40 -2
fs/f2fs/inline.c
··· 12 12 #include <linux/f2fs_fs.h> 13 13 14 14 #include "f2fs.h" 15 + #include "node.h" 15 16 16 17 bool f2fs_may_inline_data(struct inode *inode) 17 18 { ··· 275 274 if (f2fs_has_inline_data(inode)) { 276 275 ipage = get_node_page(sbi, inode->i_ino); 277 276 f2fs_bug_on(sbi, IS_ERR(ipage)); 278 - truncate_inline_inode(ipage, 0); 277 + if (!truncate_inline_inode(ipage, 0)) 278 + return false; 279 279 f2fs_clear_inline_inode(inode); 280 280 update_inode(inode, ipage); 281 281 f2fs_put_page(ipage, 1); 282 282 } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { 283 - truncate_blocks(inode, 0, false); 283 + if (truncate_blocks(inode, 0, false)) 284 + return false; 284 285 goto process_inline; 285 286 } 286 287 return false; ··· 570 567 571 568 f2fs_put_page(ipage, 1); 572 569 return 0; 570 + } 571 + 572 + int f2fs_inline_data_fiemap(struct inode *inode, 573 + struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) 574 + { 575 + __u64 byteaddr, ilen; 576 + __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | 577 + FIEMAP_EXTENT_LAST; 578 + struct node_info ni; 579 + struct page *ipage; 580 + int err = 0; 581 + 582 + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 583 + if (IS_ERR(ipage)) 584 + return PTR_ERR(ipage); 585 + 586 + if (!f2fs_has_inline_data(inode)) { 587 + err = -EAGAIN; 588 + goto out; 589 + } 590 + 591 + ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode)); 592 + if (start >= ilen) 593 + goto out; 594 + if (start + len < ilen) 595 + ilen = start + len; 596 + ilen -= start; 597 + 598 + get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); 599 + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; 600 + byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage); 601 + err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); 602 + out: 603 + f2fs_put_page(ipage, 1); 604 + return err; 573 605 }
+2 -6
fs/f2fs/inode.c
··· 296 296 return 0; 297 297 298 298 /* 299 - * We need to lock here to prevent from producing dirty node pages 299 + * We need to balance fs here to prevent from producing dirty node pages 300 300 * during the urgent cleaning time when runing out of free sections. 301 301 */ 302 - f2fs_lock_op(sbi); 303 302 update_inode_page(inode); 304 - f2fs_unlock_op(sbi); 305 303 306 - if (wbc) 307 - f2fs_balance_fs(sbi); 308 - 304 + f2fs_balance_fs(sbi); 309 305 return 0; 310 306 } 311 307
+15 -4
fs/f2fs/namei.c
··· 410 410 * If the symlink path is stored into inline_data, there is no 411 411 * performance regression. 412 412 */ 413 - if (!err) 413 + if (!err) { 414 414 filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1); 415 415 416 - if (IS_DIRSYNC(dir)) 417 - f2fs_sync_fs(sbi->sb, 1); 416 + if (IS_DIRSYNC(dir)) 417 + f2fs_sync_fs(sbi->sb, 1); 418 + } else { 419 + f2fs_unlink(dir, dentry); 420 + } 418 421 419 422 kfree(sd); 420 423 f2fs_fname_crypto_free_buffer(&disk_link); ··· 950 947 951 948 /* Symlink is encrypted */ 952 949 sd = (struct f2fs_encrypted_symlink_data *)caddr; 953 - cstr.name = sd->encrypted_path; 954 950 cstr.len = le16_to_cpu(sd->len); 951 + cstr.name = kmalloc(cstr.len, GFP_NOFS); 952 + if (!cstr.name) { 953 + res = -ENOMEM; 954 + goto errout; 955 + } 956 + memcpy(cstr.name, sd->encrypted_path, cstr.len); 955 957 956 958 /* this is broken symlink case */ 957 959 if (cstr.name[0] == 0 && cstr.len == 0) { ··· 978 970 if (res < 0) 979 971 goto errout; 980 972 973 + kfree(cstr.name); 974 + 981 975 paddr = pstr.name; 982 976 983 977 /* Null-terminate the name */ ··· 989 979 page_cache_release(cpage); 990 980 return *cookie = paddr; 991 981 errout: 982 + kfree(cstr.name); 992 983 f2fs_fname_crypto_free_buffer(&pstr); 993 984 kunmap(cpage); 994 985 page_cache_release(cpage);
+16 -10
fs/f2fs/node.c
··· 1323 1323 nid = nid_of_node(page); 1324 1324 f2fs_bug_on(sbi, page->index != nid); 1325 1325 1326 + if (wbc->for_reclaim) { 1327 + if (!down_read_trylock(&sbi->node_write)) 1328 + goto redirty_out; 1329 + } else { 1330 + down_read(&sbi->node_write); 1331 + } 1332 + 1326 1333 get_node_info(sbi, nid, &ni); 1327 1334 1328 1335 /* This page is already truncated */ 1329 1336 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1330 1337 ClearPageUptodate(page); 1331 1338 dec_page_count(sbi, F2FS_DIRTY_NODES); 1339 + up_read(&sbi->node_write); 1332 1340 unlock_page(page); 1333 1341 return 0; 1334 - } 1335 - 1336 - if (wbc->for_reclaim) { 1337 - if (!down_read_trylock(&sbi->node_write)) 1338 - goto redirty_out; 1339 - } else { 1340 - down_read(&sbi->node_write); 1341 1342 } 1342 1343 1343 1344 set_page_writeback(page); ··· 1529 1528 return; 1530 1529 1531 1530 /* readahead nat pages to be scanned */ 1532 - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT); 1531 + ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 1532 + META_NAT, true); 1533 1533 1534 1534 while (1) { 1535 1535 struct page *page = get_current_nat_page(sbi, nid); ··· 1560 1558 remove_free_nid(nm_i, nid); 1561 1559 } 1562 1560 mutex_unlock(&curseg->curseg_mutex); 1561 + 1562 + ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 1563 + nm_i->ra_nid_pages, META_NAT, false); 1563 1564 } 1564 1565 1565 1566 /* ··· 1808 1803 nrpages = min(last_offset - i, bio_blocks); 1809 1804 1810 1805 /* readahead node pages */ 1811 - ra_meta_pages(sbi, addr, nrpages, META_POR); 1806 + ra_meta_pages(sbi, addr, nrpages, META_POR, true); 1812 1807 1813 1808 for (idx = addr; idx < addr + nrpages; idx++) { 1814 - struct page *page = get_meta_page(sbi, idx); 1809 + struct page *page = get_tmp_page(sbi, idx); 1815 1810 1816 1811 rn = F2FS_NODE(page); 1817 1812 sum_entry->nid = rn->footer.nid; ··· 2005 2000 nm_i->fcnt = 0; 2006 2001 nm_i->nat_cnt = 0; 2007 2002 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2003 + nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2008 2004 2009 2005 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2010 2006 INIT_LIST_HEAD(&nm_i->free_nid_list);
+3 -1
fs/f2fs/node.h
··· 14 14 /* node block offset on the NAT area dedicated to the given start node id */ 15 15 #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) 16 16 17 - /* # of pages to perform readahead before building free nids */ 17 + /* # of pages to perform synchronous readahead before building free nids */ 18 18 #define FREE_NID_PAGES 4 19 + 20 + #define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */ 19 21 20 22 /* maximum readahead size for node during getting data blocks */ 21 23 #define MAX_RA_NODE 128
+5 -10
fs/f2fs/recovery.c
··· 180 180 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 181 181 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 182 182 183 - ra_meta_pages(sbi, blkaddr, 1, META_POR); 183 + ra_meta_pages(sbi, blkaddr, 1, META_POR, true); 184 184 185 185 while (1) { 186 186 struct fsync_inode_entry *entry; ··· 188 188 if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) 189 189 return 0; 190 190 191 - page = get_meta_page(sbi, blkaddr); 191 + page = get_tmp_page(sbi, blkaddr); 192 192 193 193 if (cp_ver != cpver_of_node(page)) 194 194 break; ··· 383 383 start = start_bidx_of_node(ofs_of_node(page), fi); 384 384 end = start + ADDRS_PER_PAGE(page, fi); 385 385 386 - f2fs_lock_op(sbi); 387 - 388 386 set_new_dnode(&dn, inode, NULL, NULL, 0); 389 387 390 388 err = get_dnode_of_data(&dn, start, ALLOC_NODE); 391 - if (err) { 392 - f2fs_unlock_op(sbi); 389 + if (err) 393 390 goto out; 394 - } 395 391 396 392 f2fs_wait_on_page_writeback(dn.node_page, NODE); 397 393 ··· 452 456 set_page_dirty(dn.node_page); 453 457 err: 454 458 f2fs_put_dnode(&dn); 455 - f2fs_unlock_op(sbi); 456 459 out: 457 460 f2fs_msg(sbi->sb, KERN_NOTICE, 458 461 "recover_data: ino = %lx, recovered = %d blocks, err = %d", ··· 480 485 481 486 ra_meta_pages_cond(sbi, blkaddr); 482 487 483 - page = get_meta_page(sbi, blkaddr); 488 + page = get_tmp_page(sbi, blkaddr); 484 489 485 490 if (cp_ver != cpver_of_node(page)) { 486 491 f2fs_put_page(page, 1); ··· 565 570 566 571 /* truncate meta pages to be used by the recovery */ 567 572 truncate_inode_pages_range(META_MAPPING(sbi), 568 - MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); 573 + (loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); 569 574 570 575 if (err) { 571 576 truncate_inode_pages_final(NODE_MAPPING(sbi));
+126 -80
fs/f2fs/segment.c
··· 14 14 #include <linux/blkdev.h> 15 15 #include <linux/prefetch.h> 16 16 #include <linux/kthread.h> 17 - #include <linux/vmalloc.h> 18 17 #include <linux/swap.h> 18 + #include <linux/timer.h> 19 19 20 20 #include "f2fs.h" 21 21 #include "segment.h" ··· 29 29 static struct kmem_cache *sit_entry_set_slab; 30 30 static struct kmem_cache *inmem_entry_slab; 31 31 32 + static unsigned long __reverse_ulong(unsigned char *str) 33 + { 34 + unsigned long tmp = 0; 35 + int shift = 24, idx = 0; 36 + 37 + #if BITS_PER_LONG == 64 38 + shift = 56; 39 + #endif 40 + while (shift >= 0) { 41 + tmp |= (unsigned long)str[idx++] << shift; 42 + shift -= BITS_PER_BYTE; 43 + } 44 + return tmp; 45 + } 46 + 32 47 /* 33 48 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 34 49 * MSB and LSB are reversed in a byte by f2fs_set_bit. ··· 53 38 int num = 0; 54 39 55 40 #if BITS_PER_LONG == 64 56 - if ((word & 0xffffffff) == 0) { 41 + if ((word & 0xffffffff00000000UL) == 0) 57 42 num += 32; 43 + else 58 44 word >>= 32; 59 - } 60 45 #endif 61 - if ((word & 0xffff) == 0) { 46 + if ((word & 0xffff0000) == 0) 62 47 num += 16; 48 + else 63 49 word >>= 16; 64 - } 65 - if ((word & 0xff) == 0) { 50 + 51 + if ((word & 0xff00) == 0) 66 52 num += 8; 53 + else 67 54 word >>= 8; 68 - } 55 + 69 56 if ((word & 0xf0) == 0) 70 57 num += 4; 71 58 else 72 59 word >>= 4; 60 + 73 61 if ((word & 0xc) == 0) 74 62 num += 2; 75 63 else 76 64 word >>= 2; 65 + 77 66 if ((word & 0x2) == 0) 78 67 num += 1; 79 68 return num; ··· 87 68 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 88 69 * f2fs_set_bit makes MSB and LSB reversed in a byte. 89 70 * Example: 90 - * LSB <--> MSB 91 - * f2fs_set_bit(0, bitmap) => 0000 0001 92 - * f2fs_set_bit(7, bitmap) => 1000 0000 71 + * MSB <--> LSB 72 + * f2fs_set_bit(0, bitmap) => 1000 0000 73 + * f2fs_set_bit(7, bitmap) => 0000 0001 93 74 */ 94 75 static unsigned long __find_rev_next_bit(const unsigned long *addr, 95 76 unsigned long size, unsigned long offset) 96 77 { 97 - while (!f2fs_test_bit(offset, (unsigned char *)addr)) 98 - offset++; 99 - 100 - if (offset > size) 101 - offset = size; 102 - 103 - return offset; 104 - #if 0 105 78 const unsigned long *p = addr + BIT_WORD(offset); 106 79 unsigned long result = offset & ~(BITS_PER_LONG - 1); 107 80 unsigned long tmp; 108 - unsigned long mask, submask; 109 - unsigned long quot, rest; 110 81 111 82 if (offset >= size) 112 83 return size; ··· 106 97 if (!offset) 107 98 goto aligned; 108 99 109 - tmp = *(p++); 110 - quot = (offset >> 3) << 3; 111 - rest = offset & 0x7; 112 - mask = ~0UL << quot; 113 - submask = (unsigned char)(0xff << rest) >> rest; 114 - submask <<= quot; 115 - mask &= submask; 116 - tmp &= mask; 100 + tmp = __reverse_ulong((unsigned char *)p); 101 + tmp &= ~0UL >> offset; 102 + 117 103 if (size < BITS_PER_LONG) 118 104 goto found_first; 119 105 if (tmp) ··· 116 112 117 113 size -= BITS_PER_LONG; 118 114 result += BITS_PER_LONG; 115 + p++; 119 116 aligned: 120 117 while (size & ~(BITS_PER_LONG-1)) { 121 - tmp = *(p++); 118 + tmp = __reverse_ulong((unsigned char *)p); 122 119 if (tmp) 123 120 goto found_middle; 124 121 result += BITS_PER_LONG; 125 122 size -= BITS_PER_LONG; 123 + p++; 126 124 } 127 125 if (!size) 128 126 return result; 129 - tmp = *p; 127 + 128 + tmp = __reverse_ulong((unsigned char *)p); 130 129 found_first: 131 - tmp &= (~0UL >> (BITS_PER_LONG - size)); 132 - if (tmp == 0UL) /* Are any bits set? */ 130 + tmp &= (~0UL << (BITS_PER_LONG - size)); 131 + if (!tmp) /* Are any bits set? */ 133 132 return result + size; /* Nope. */ 134 133 found_middle: 135 134 return result + __reverse_ffs(tmp); 136 - #endif 137 135 } 138 136 139 137 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 140 138 unsigned long size, unsigned long offset) 141 139 { 142 - while (f2fs_test_bit(offset, (unsigned char *)addr)) 143 - offset++; 144 - 145 - if (offset > size) 146 - offset = size; 147 - 148 - return offset; 149 - #if 0 150 140 const unsigned long *p = addr + BIT_WORD(offset); 151 141 unsigned long result = offset & ~(BITS_PER_LONG - 1); 152 142 unsigned long tmp; 153 - unsigned long mask, submask; 154 - unsigned long quot, rest; 155 143 156 144 if (offset >= size) 157 145 return size; ··· 153 157 if (!offset) 154 158 goto aligned; 155 159 156 - tmp = *(p++); 157 - quot = (offset >> 3) << 3; 158 - rest = offset & 0x7; 159 - mask = ~(~0UL << quot); 160 - submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest); 161 - submask <<= quot; 162 - mask += submask; 163 - tmp |= mask; 160 + tmp = __reverse_ulong((unsigned char *)p); 161 + tmp |= ~((~0UL << offset) >> offset); 162 + 164 163 if (size < BITS_PER_LONG) 165 164 goto found_first; 166 - if (~tmp) 165 + if (tmp != ~0UL) 167 166 goto found_middle; 168 167 169 168 size -= BITS_PER_LONG; 170 169 result += BITS_PER_LONG; 170 + p++; 171 171 aligned: 172 172 while (size & ~(BITS_PER_LONG - 1)) { 173 - tmp = *(p++); 174 - if (~tmp) 173 + tmp = __reverse_ulong((unsigned char *)p); 174 + if (tmp != ~0UL) 175 175 goto found_middle; 176 176 result += BITS_PER_LONG; 177 177 size -= BITS_PER_LONG; 178 + p++; 178 179 } 179 180 if (!size) 180 181 return result; 181 - tmp = *p; 182 182 183 + tmp = __reverse_ulong((unsigned char *)p); 183 184 found_first: 184 - tmp |= ~0UL << size; 185 - if (tmp == ~0UL) /* Are any bits zero? */ 185 + tmp |= ~(~0UL << (BITS_PER_LONG - size)); 186 + if (tmp == ~0UL) /* Are any bits zero? */ 186 187 return result + size; /* Nope. */ 187 188 found_middle: 188 189 return result + __reverse_ffz(tmp); 189 - #endif 190 190 } 191 191 192 192 void register_inmem_page(struct inode *inode, struct page *page) ··· 249 257 trace_f2fs_commit_inmem_page(cur->page, INMEM); 250 258 fio.page = cur->page; 251 259 err = do_write_data_page(&fio); 252 - submit_bio = true; 253 260 if (err) { 254 261 unlock_page(cur->page); 255 262 break; 256 263 } 264 + clear_cold_data(cur->page); 265 + submit_bio = true; 257 266 } 258 267 } else { 259 268 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); ··· 289 296 */ 290 297 if (has_not_enough_free_secs(sbi, 0)) { 291 298 mutex_lock(&sbi->gc_mutex); 292 - f2fs_gc(sbi); 299 + f2fs_gc(sbi, false); 293 300 } 294 301 } 295 302 ··· 309 316 /* checkpoint is the only way to shrink partial cached entries */ 310 317 if (!available_free_memory(sbi, NAT_ENTRIES) || 311 318 excess_prefree_segs(sbi) || 312 - !available_free_memory(sbi, INO_ENTRIES)) 319 + !available_free_memory(sbi, INO_ENTRIES) || 320 + jiffies > sbi->cp_expires) 313 321 f2fs_sync_fs(sbi->sb, true); 314 322 } 315 323 ··· 759 765 locate_dirty_segment(sbi, segno); 760 766 761 767 mutex_unlock(&sit_i->sentry_lock); 768 + } 769 + 770 + bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 771 + { 772 + struct sit_info *sit_i = SIT_I(sbi); 773 + unsigned int segno, offset; 774 + struct seg_entry *se; 775 + bool is_cp = false; 776 + 777 + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) 778 + return true; 779 + 780 + mutex_lock(&sit_i->sentry_lock); 781 + 782 + segno = GET_SEGNO(sbi, blkaddr); 783 + se = get_seg_entry(sbi, segno); 784 + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 785 + 786 + if (f2fs_test_bit(offset, se->ckpt_valid_map)) 787 + is_cp = true; 788 + 789 + mutex_unlock(&sit_i->sentry_lock); 790 + 791 + return is_cp; 762 792 } 763 793 764 794 /* ··· 1310 1292 .encrypted_page = NULL, 1311 1293 }; 1312 1294 1295 + if (unlikely(page->index >= MAIN_BLKADDR(sbi))) 1296 + fio.rw &= ~REQ_META; 1297 + 1313 1298 set_page_writeback(page); 1314 1299 f2fs_submit_page_mbio(&fio); 1315 1300 } ··· 1390 1369 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); 1391 1370 __add_sum_entry(sbi, type, sum); 1392 1371 1393 - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1372 + if (!recover_curseg) 1373 + update_sit_entry(sbi, new_blkaddr, 1); 1374 + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 1375 + update_sit_entry(sbi, old_blkaddr, -1); 1376 + 1377 + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 1378 + locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); 1379 + 1394 1380 locate_dirty_segment(sbi, old_cursegno); 1395 1381 1396 1382 if (recover_curseg) { ··· 1474 1446 if (is_merged_page(sbi, page, type)) 1475 1447 f2fs_submit_merged_bio(sbi, type, WRITE); 1476 1448 wait_on_page_writeback(page); 1449 + } 1450 + } 1451 + 1452 + void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, 1453 + block_t blkaddr) 1454 + { 1455 + struct page *cpage; 1456 + 1457 + if (blkaddr == NEW_ADDR) 1458 + return; 1459 + 1460 + f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 1461 + 1462 + cpage = find_lock_page(META_MAPPING(sbi), blkaddr); 1463 + if (cpage) { 1464 + f2fs_wait_on_page_writeback(cpage, DATA); 1465 + f2fs_put_page(cpage, 1); 1477 1466 } 1478 1467 } 1479 1468 ··· 1631 1586 1632 1587 if (npages >= 2) 1633 1588 ra_meta_pages(sbi, start_sum_block(sbi), npages, 1634 - META_CP); 1589 + META_CP, true); 1635 1590 1636 1591 /* restore for compacted data summary */ 1637 1592 if (read_compacted_summaries(sbi)) ··· 1641 1596 1642 1597 if (__exist_node_summaries(sbi)) 1643 1598 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), 1644 - NR_CURSEG_TYPE - type, META_CP); 1599 + NR_CURSEG_TYPE - type, META_CP, true); 1645 1600 1646 1601 for (; type <= CURSEG_COLD_NODE; type++) { 1647 1602 err = read_normal_summaries(sbi, type); ··· 2000 1955 2001 1956 SM_I(sbi)->sit_info = sit_i; 2002 1957 2003 - sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry)); 1958 + sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * 1959 + sizeof(struct seg_entry), GFP_KERNEL); 2004 1960 if (!sit_i->sentries) 2005 1961 return -ENOMEM; 2006 1962 2007 1963 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2008 - sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1964 + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2009 1965 if (!sit_i->dirty_sentries_bitmap) 2010 1966 return -ENOMEM; 2011 1967 ··· 2028 1982 return -ENOMEM; 2029 1983 2030 1984 if (sbi->segs_per_sec > 1) { 2031 - sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * 2032 - sizeof(struct sec_entry)); 1985 + sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * 1986 + sizeof(struct sec_entry), GFP_KERNEL); 2033 1987 if (!sit_i->sec_entries) 2034 1988 return -ENOMEM; 2035 1989 } ··· 2074 2028 SM_I(sbi)->free_info = free_i; 2075 2029 2076 2030 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2077 - free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); 2031 + free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); 2078 2032 if (!free_i->free_segmap) 2079 2033 return -ENOMEM; 2080 2034 2081 2035 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 2082 - free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); 2036 + free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); 2083 2037 if (!free_i->free_secmap) 2084 2038 return -ENOMEM; 2085 2039 ··· 2128 2082 int nrpages = MAX_BIO_BLOCKS(sbi); 2129 2083 2130 2084 do { 2131 - readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); 2085 + readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); 2132 2086 2133 2087 start = start_blk * sit_i->sents_per_block; 2134 2088 end = (start_blk + readed) * sit_i->sents_per_block; ··· 2220 2174 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2221 2175 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); 2222 2176 2223 - dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); 2177 + dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2224 2178 if (!dirty_i->victim_secmap) 2225 2179 return -ENOMEM; 2226 2180 return 0; ··· 2242 2196 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); 2243 2197 2244 2198 for (i = 0; i < NR_DIRTY_TYPE; i++) { 2245 - dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); 2199 + dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); 2246 2200 if (!dirty_i->dirty_segmap[i]) 2247 2201 return -ENOMEM; 2248 2202 } ··· 2347 2301 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2348 2302 2349 2303 mutex_lock(&dirty_i->seglist_lock); 2350 - kfree(dirty_i->dirty_segmap[dirty_type]); 2304 + kvfree(dirty_i->dirty_segmap[dirty_type]); 2351 2305 dirty_i->nr_dirty[dirty_type] = 0; 2352 2306 mutex_unlock(&dirty_i->seglist_lock); 2353 2307 } ··· 2355 2309 static void destroy_victim_secmap(struct f2fs_sb_info *sbi) 2356 2310 { 2357 2311 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 2358 - kfree(dirty_i->victim_secmap); 2312 + kvfree(dirty_i->victim_secmap); 2359 2313 } 2360 2314 2361 2315 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) ··· 2394 2348 if (!free_i) 2395 2349 return; 2396 2350 SM_I(sbi)->free_info = NULL; 2397 - kfree(free_i->free_segmap); 2398 - kfree(free_i->free_secmap); 2351 + kvfree(free_i->free_segmap); 2352 + kvfree(free_i->free_secmap); 2399 2353 kfree(free_i); 2400 2354 } 2401 2355 ··· 2416 2370 } 2417 2371 kfree(sit_i->tmp_map); 2418 2372 2419 - vfree(sit_i->sentries); 2420 - vfree(sit_i->sec_entries); 2421 - kfree(sit_i->dirty_sentries_bitmap); 2373 + kvfree(sit_i->sentries); 2374 + kvfree(sit_i->sec_entries); 2375 + kvfree(sit_i->dirty_sentries_bitmap); 2422 2376 2423 2377 SM_I(sbi)->sit_info = NULL; 2424 2378 kfree(sit_i->sit_bitmap);
+3 -1
fs/f2fs/segment.h
··· 137 137 /* 138 138 * BG_GC means the background cleaning job. 139 139 * FG_GC means the on-demand cleaning job. 140 + * FORCE_FG_GC means on-demand cleaning job in background. 140 141 */ 141 142 enum { 142 143 BG_GC = 0, 143 - FG_GC 144 + FG_GC, 145 + FORCE_FG_GC, 144 146 }; 145 147 146 148 /* for a function parameter to select a victim segment */
+31 -6
fs/f2fs/super.c
··· 213 213 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 214 214 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); 215 215 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 216 + F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); 216 217 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 217 218 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 219 + F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); 218 220 219 221 #define ATTR_LIST(name) (&f2fs_attr_##name.attr) 220 222 static struct attribute *f2fs_attrs[] = { ··· 233 231 ATTR_LIST(max_victim_search), 234 232 ATTR_LIST(dir_level), 235 233 ATTR_LIST(ram_thresh), 234 + ATTR_LIST(ra_nid_pages), 235 + ATTR_LIST(cp_interval), 236 236 NULL, 237 237 }; 238 238 ··· 296 292 297 293 if (!name) 298 294 return -ENOMEM; 299 - if (strlen(name) == 2 && !strncmp(name, "on", 2)) 295 + if (strlen(name) == 2 && !strncmp(name, "on", 2)) { 300 296 set_opt(sbi, BG_GC); 301 - else if (strlen(name) == 3 && !strncmp(name, "off", 3)) 297 + clear_opt(sbi, FORCE_FG_GC); 298 + } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { 302 299 clear_opt(sbi, BG_GC); 303 - else { 300 + clear_opt(sbi, FORCE_FG_GC); 301 + } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { 302 + set_opt(sbi, BG_GC); 303 + set_opt(sbi, FORCE_FG_GC); 304 + } else { 304 305 kfree(name); 305 306 return -EINVAL; 306 307 } ··· 640 631 { 641 632 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); 642 633 643 - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) 644 - seq_printf(seq, ",background_gc=%s", "on"); 645 - else 634 + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) { 635 + if (test_opt(sbi, FORCE_FG_GC)) 636 + seq_printf(seq, ",background_gc=%s", "sync"); 637 + else 638 + seq_printf(seq, ",background_gc=%s", "on"); 639 + } else { 646 640 seq_printf(seq, ",background_gc=%s", "off"); 641 + } 647 642 if (test_opt(sbi, DISABLE_ROLL_FORWARD)) 648 643 seq_puts(seq, ",disable_roll_forward"); 649 644 if (test_opt(sbi, DISCARD)) ··· 755 742 int err, active_logs; 756 743 bool need_restart_gc = false; 757 744 bool need_stop_gc = false; 745 + bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); 758 746 759 747 sync_filesystem(sb); 760 748 ··· 780 766 */ 781 767 if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) 782 768 goto skip; 769 + 770 + /* disallow enable/disable extent_cache dynamically */ 771 + if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { 772 + err = -EINVAL; 773 + f2fs_msg(sbi->sb, KERN_WARNING, 774 + "switch extent_cache option is not allowed"); 775 + goto restore_opts; 776 + } 783 777 784 778 /* 785 779 * We stop the GC thread if FS is mounted as RO ··· 1018 996 atomic_set(&sbi->nr_pages[i], 0); 1019 997 1020 998 sbi->dir_level = DEF_DIR_LEVEL; 999 + sbi->cp_interval = DEF_CP_INTERVAL; 1021 1000 clear_sbi_flag(sbi, SBI_NEED_FSCK); 1022 1001 1023 1002 INIT_LIST_HEAD(&sbi->s_list); ··· 1354 1331 f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); 1355 1332 f2fs_commit_super(sbi, true); 1356 1333 } 1334 + 1335 + sbi->cp_expires = round_jiffies_up(jiffies); 1357 1336 1358 1337 return 0; 1359 1338
+64 -5
include/trace/events/f2fs.h
··· 514 514 __entry->ret) 515 515 ); 516 516 517 + TRACE_EVENT(f2fs_background_gc, 518 + 519 + TP_PROTO(struct super_block *sb, long wait_ms, 520 + unsigned int prefree, unsigned int free), 521 + 522 + TP_ARGS(sb, wait_ms, prefree, free), 523 + 524 + TP_STRUCT__entry( 525 + __field(dev_t, dev) 526 + __field(long, wait_ms) 527 + __field(unsigned int, prefree) 528 + __field(unsigned int, free) 529 + ), 530 + 531 + TP_fast_assign( 532 + __entry->dev = sb->s_dev; 533 + __entry->wait_ms = wait_ms; 534 + __entry->prefree = prefree; 535 + __entry->free = free; 536 + ), 537 + 538 + TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u", 539 + show_dev(__entry), 540 + __entry->wait_ms, 541 + __entry->prefree, 542 + __entry->free) 543 + ); 544 + 517 545 TRACE_EVENT(f2fs_get_victim, 518 546 519 547 TP_PROTO(struct super_block *sb, int type, int gc_type, ··· 1028 1000 __entry->for_sync) 1029 1001 ); 1030 1002 1003 + TRACE_EVENT(f2fs_readpages, 1004 + 1005 + TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage), 1006 + 1007 + TP_ARGS(inode, page, nrpage), 1008 + 1009 + TP_STRUCT__entry( 1010 + __field(dev_t, dev) 1011 + __field(ino_t, ino) 1012 + __field(pgoff_t, start) 1013 + __field(unsigned int, nrpage) 1014 + ), 1015 + 1016 + TP_fast_assign( 1017 + __entry->dev = inode->i_sb->s_dev; 1018 + __entry->ino = inode->i_ino; 1019 + __entry->start = page->index; 1020 + __entry->nrpage = nrpage; 1021 + ), 1022 + 1023 + TP_printk("dev = (%d,%d), ino = %lu, start = %lu nrpage = %u", 1024 + show_dev_ino(__entry), 1025 + (unsigned long)__entry->start, 1026 + __entry->nrpage) 1027 + ); 1028 + 1031 1029 TRACE_EVENT(f2fs_write_checkpoint, 1032 1030 1033 1031 TP_PROTO(struct super_block *sb, int reason, char *msg), ··· 1186 1132 __entry->len) 1187 1133 ); 1188 1134 1189 - TRACE_EVENT(f2fs_update_extent_tree, 1135 + TRACE_EVENT(f2fs_update_extent_tree_range, 1190 1136 1191 - TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr), 1137 + TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr, 1138 + unsigned int len), 1192 1139 1193 - TP_ARGS(inode, pgofs, blkaddr), 1140 + TP_ARGS(inode, pgofs, blkaddr, len), 1194 1141 1195 1142 TP_STRUCT__entry( 1196 1143 __field(dev_t, dev) 1197 1144 __field(ino_t, ino) 1198 1145 __field(unsigned int, pgofs) 1199 1146 __field(u32, blk) 1147 + __field(unsigned int, len) 1200 1148 ), 1201 1149 1202 1150 TP_fast_assign( ··· 1206 1150 __entry->ino = inode->i_ino; 1207 1151 __entry->pgofs = pgofs; 1208 1152 __entry->blk = blkaddr; 1153 + __entry->len = len; 1209 1154 ), 1210 1155 1211 - TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u", 1156 + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " 1157 + "blkaddr = %u, len = %u", 1212 1158 show_dev_ino(__entry), 1213 1159 __entry->pgofs, 1214 - __entry->blk) 1160 + __entry->blk, 1161 + __entry->len) 1215 1162 ); 1216 1163 1217 1164 TRACE_EVENT(f2fs_shrink_extent_tree,