Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
"In this round, we've focused on enhancing performance with regards to
block allocation, GC, and discard/in-place-update IO controls. There
are a bunch of clean-ups as well as minor bug fixes.

Enhancements:
- disable heap-based allocation by default
- issue small-sized discard commands by default
- change the policy of data hotness for logging
- distinguish IOs in terms of size and wbc type
- start SSR earlier to avoid foreground GC
- enhance data structures managing discard commands
- enhance in-place update flow
- add some more fault injection routines
- secure one more xattr entry

Bug fixes:
- calculate victim cost for GC correctly
- remain correct victim segment number for GC
- race condition in nid allocator and initializer
- stale pointer produced by atomic_writes
- fix missing REQ_SYNC for flush commands
- handle missing errors in more corner cases"

* tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (111 commits)
f2fs: fix a mount fail for wrong next_scan_nid
f2fs: enhance scalability of trace macro
f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGS
f2fs: Make flush bios explicitely sync
f2fs: show available_nids in f2fs/status
f2fs: flush dirty nats periodically
f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard
f2fs: allow cpc->reason to indicate more than one reason
f2fs: release cp and dnode lock before IPU
f2fs: shrink size of struct discard_cmd
f2fs: don't hold cmd_lock during waiting discard command
f2fs: nullify fio->encrypted_page for each writes
f2fs: sanity check segment count
f2fs: introduce valid_ipu_blkaddr to clean up
f2fs: lookup extent cache first under IPU scenario
f2fs: reconstruct code to write a data page
f2fs: introduce __wait_discard_cmd
f2fs: introduce __issue_discard_cmd
f2fs: enable small discard by default
f2fs: delay awaking discard thread
...

+1762 -943
+56 -31
fs/f2fs/checkpoint.c
··· 275 275 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 276 276 goto skip_write; 277 277 278 - trace_f2fs_writepages(mapping->host, wbc, META); 278 + /* if locked failed, cp will flush dirty pages instead */ 279 + if (!mutex_trylock(&sbi->cp_mutex)) 280 + goto skip_write; 279 281 280 - /* if mounting is failed, skip writing node pages */ 281 - mutex_lock(&sbi->cp_mutex); 282 + trace_f2fs_writepages(mapping->host, wbc, META); 282 283 diff = nr_pages_to_write(sbi, META, wbc); 283 284 written = sync_meta_pages(sbi, META, wbc->nr_to_write); 284 285 mutex_unlock(&sbi->cp_mutex); ··· 568 567 if (ni.blk_addr != NULL_ADDR) { 569 568 set_sbi_flag(sbi, SBI_NEED_FSCK); 570 569 f2fs_msg(sbi->sb, KERN_WARNING, 571 - "%s: orphan failed (ino=%x), run fsck to fix.", 570 + "%s: orphan failed (ino=%x) by kernel, retry mount.", 572 571 __func__, ino); 573 572 return -EIO; 574 573 } ··· 678 677 *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); 679 678 680 679 crc_offset = le32_to_cpu((*cp_block)->checksum_offset); 681 - if (crc_offset >= blk_size) { 680 + if (crc_offset > (blk_size - sizeof(__le32))) { 682 681 f2fs_msg(sbi->sb, KERN_WARNING, 683 682 "invalid crc_offset: %zu", crc_offset); 684 683 return -EINVAL; ··· 817 816 return; 818 817 819 818 set_inode_flag(inode, flag); 820 - list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); 819 + if (!f2fs_is_volatile_file(inode)) 820 + list_add_tail(&F2FS_I(inode)->dirty_list, 821 + &sbi->inode_list[type]); 821 822 stat_inc_dirty_inode(sbi, type); 822 823 } 823 824 ··· 944 941 return 0; 945 942 } 946 943 944 + static void __prepare_cp_block(struct f2fs_sb_info *sbi) 945 + { 946 + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 947 + struct f2fs_nm_info *nm_i = NM_I(sbi); 948 + nid_t last_nid = nm_i->next_scan_nid; 949 + 950 + next_free_nid(sbi, &last_nid); 951 + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 952 + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 953 + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 954 + ckpt->next_free_nid = cpu_to_le32(last_nid); 955 + } 956 + 947 957 /* 948 958 * Freeze all the FS-operations for checkpoint. 949 959 */ ··· 980 964 err = sync_dirty_inodes(sbi, DIR_INODE); 981 965 if (err) 982 966 goto out; 983 - goto retry_flush_dents; 984 - } 985 - 986 - if (get_pages(sbi, F2FS_DIRTY_IMETA)) { 987 - f2fs_unlock_all(sbi); 988 - err = f2fs_sync_inode_meta(sbi); 989 - if (err) 990 - goto out; 967 + cond_resched(); 991 968 goto retry_flush_dents; 992 969 } 993 970 994 971 /* 995 972 * POR: we should ensure that there are no dirty node pages 996 - * until finishing nat/sit flush. 973 + * until finishing nat/sit flush. inode->i_blocks can be updated. 997 974 */ 975 + down_write(&sbi->node_change); 976 + 977 + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { 978 + up_write(&sbi->node_change); 979 + f2fs_unlock_all(sbi); 980 + err = f2fs_sync_inode_meta(sbi); 981 + if (err) 982 + goto out; 983 + cond_resched(); 984 + goto retry_flush_dents; 985 + } 986 + 998 987 retry_flush_nodes: 999 988 down_write(&sbi->node_write); 1000 989 ··· 1007 986 up_write(&sbi->node_write); 1008 987 err = sync_node_pages(sbi, &wbc); 1009 988 if (err) { 989 + up_write(&sbi->node_change); 1010 990 f2fs_unlock_all(sbi); 1011 991 goto out; 1012 992 } 993 + cond_resched(); 1013 994 goto retry_flush_nodes; 1014 995 } 996 + 997 + /* 998 + * sbi->node_change is used only for AIO write_begin path which produces 999 + * dirty node blocks and some checkpoint values by block allocation. 1000 + */ 1001 + __prepare_cp_block(sbi); 1002 + up_write(&sbi->node_change); 1015 1003 out: 1016 1004 blk_finish_plug(&plug); 1017 1005 return err; ··· 1054 1024 1055 1025 spin_lock(&sbi->cp_lock); 1056 1026 1057 - if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count > 1027 + if ((cpc->reason & CP_UMOUNT) && 1028 + le32_to_cpu(ckpt->cp_pack_total_block_count) > 1058 1029 sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) 1059 1030 disable_nat_bits(sbi, false); 1060 1031 1061 - if (cpc->reason == CP_UMOUNT) 1032 + if (cpc->reason & CP_TRIMMED) 1033 + __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); 1034 + 1035 + if (cpc->reason & CP_UMOUNT) 1062 1036 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1063 1037 else 1064 1038 __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1065 1039 1066 - if (cpc->reason == CP_FASTBOOT) 1040 + if (cpc->reason & CP_FASTBOOT) 1067 1041 __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); 1068 1042 else 1069 1043 __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); ··· 1091 1057 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1092 1058 struct f2fs_nm_info *nm_i = NM_I(sbi); 1093 1059 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 1094 - nid_t last_nid = nm_i->next_scan_nid; 1095 1060 block_t start_blk; 1096 1061 unsigned int data_sum_blocks, orphan_blocks; 1097 1062 __u32 crc32 = 0; ··· 1107 1074 return -EIO; 1108 1075 } 1109 1076 1110 - next_free_nid(sbi, &last_nid); 1111 - 1112 1077 /* 1113 1078 * modify checkpoint 1114 1079 * version number is already updated 1115 1080 */ 1116 1081 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 1117 - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 1118 1082 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 1119 1083 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { 1120 1084 ckpt->cur_node_segno[i] = ··· 1129 1099 ckpt->alloc_type[i + CURSEG_HOT_DATA] = 1130 1100 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); 1131 1101 } 1132 - 1133 - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); 1134 - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); 1135 - ckpt->next_free_nid = cpu_to_le32(last_nid); 1136 1102 1137 1103 /* 2 cp + n data seg summary + orphan inode blocks */ 1138 1104 data_sum_blocks = npages_for_summary_flush(sbi, false); ··· 1169 1143 /* write nat bits */ 1170 1144 if (enabled_nat_bits(sbi, cpc)) { 1171 1145 __u64 cp_ver = cur_cp_version(ckpt); 1172 - unsigned int i; 1173 1146 block_t blk; 1174 1147 1175 1148 cp_ver |= ((__u64)crc32 << 32); ··· 1275 1250 mutex_lock(&sbi->cp_mutex); 1276 1251 1277 1252 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1278 - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || 1279 - (cpc->reason == CP_DISCARD && !sbi->discard_blks))) 1253 + ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || 1254 + ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) 1280 1255 goto out; 1281 1256 if (unlikely(f2fs_cp_error(sbi))) { 1282 1257 err = -EIO; ··· 1298 1273 f2fs_flush_merged_bios(sbi); 1299 1274 1300 1275 /* this is the case of multiple fstrims without any changes */ 1301 - if (cpc->reason == CP_DISCARD) { 1276 + if (cpc->reason & CP_DISCARD) { 1302 1277 if (!exist_trim_candidates(sbi, cpc)) { 1303 1278 unblock_operations(sbi); 1304 1279 goto out; ··· 1336 1311 unblock_operations(sbi); 1337 1312 stat_inc_cp_count(sbi->stat_info); 1338 1313 1339 - if (cpc->reason == CP_RECOVERY) 1314 + if (cpc->reason & CP_RECOVERY) 1340 1315 f2fs_msg(sbi->sb, KERN_NOTICE, 1341 1316 "checkpoint: version = %llx", ckpt_ver); 1342 1317
+147 -53
fs/f2fs/data.c
··· 309 309 if (type >= META_FLUSH) { 310 310 io->fio.type = META_FLUSH; 311 311 io->fio.op = REQ_OP_WRITE; 312 - io->fio.op_flags = REQ_META | REQ_PRIO; 312 + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; 313 313 if (!test_opt(sbi, NOBARRIER)) 314 314 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; 315 315 } ··· 341 341 342 342 /* 343 343 * Fill the locked page with data located in the block address. 344 - * Return unlocked page. 344 + * A caller needs to unlock the page on failure. 345 345 */ 346 346 int f2fs_submit_page_bio(struct f2fs_io_info *fio) 347 347 { ··· 362 362 bio_set_op_attrs(bio, fio->op, fio->op_flags); 363 363 364 364 __submit_bio(fio->sbi, bio, fio->type); 365 + 366 + if (!is_read_io(fio->op)) 367 + inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); 365 368 return 0; 366 369 } 367 370 ··· 790 787 return err; 791 788 } 792 789 790 + static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) 791 + { 792 + if (flag == F2FS_GET_BLOCK_PRE_AIO) { 793 + if (lock) 794 + down_read(&sbi->node_change); 795 + else 796 + up_read(&sbi->node_change); 797 + } else { 798 + if (lock) 799 + f2fs_lock_op(sbi); 800 + else 801 + f2fs_unlock_op(sbi); 802 + } 803 + } 804 + 793 805 /* 794 806 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with 795 807 * f2fs_map_blocks structure. ··· 847 829 848 830 next_dnode: 849 831 if (create) 850 - f2fs_lock_op(sbi); 832 + __do_map_lock(sbi, flag, true); 851 833 852 834 /* When reading holes, we need its node page */ 853 835 set_new_dnode(&dn, inode, NULL, NULL, 0); ··· 957 939 f2fs_put_dnode(&dn); 958 940 959 941 if (create) { 960 - f2fs_unlock_op(sbi); 942 + __do_map_lock(sbi, flag, false); 961 943 f2fs_balance_fs(sbi, dn.node_changed); 962 944 } 963 945 goto next_dnode; ··· 966 948 f2fs_put_dnode(&dn); 967 949 unlock_out: 968 950 if (create) { 969 - f2fs_unlock_op(sbi); 951 + __do_map_lock(sbi, flag, false); 970 952 f2fs_balance_fs(sbi, dn.node_changed); 971 953 } 972 954 out: ··· 1169 1151 1170 1152 for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { 1171 1153 1172 - prefetchw(&page->flags); 1173 1154 if (pages) { 1174 1155 page = list_last_entry(pages, struct page, lru); 1156 + 1157 + prefetchw(&page->flags); 1175 1158 list_del(&page->lru); 1176 1159 if (add_to_page_cache_lru(page, mapping, 1177 1160 page->index, ··· 1302 1283 return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); 1303 1284 } 1304 1285 1286 + static int encrypt_one_page(struct f2fs_io_info *fio) 1287 + { 1288 + struct inode *inode = fio->page->mapping->host; 1289 + gfp_t gfp_flags = GFP_NOFS; 1290 + 1291 + if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) 1292 + return 0; 1293 + 1294 + /* wait for GCed encrypted page writeback */ 1295 + f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); 1296 + 1297 + retry_encrypt: 1298 + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, 1299 + PAGE_SIZE, 0, fio->page->index, gfp_flags); 1300 + if (!IS_ERR(fio->encrypted_page)) 1301 + return 0; 1302 + 1303 + /* flush pending IOs and wait for a while in the ENOMEM case */ 1304 + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { 1305 + f2fs_flush_merged_bios(fio->sbi); 1306 + congestion_wait(BLK_RW_ASYNC, HZ/50); 1307 + gfp_flags |= __GFP_NOFAIL; 1308 + goto retry_encrypt; 1309 + } 1310 + return PTR_ERR(fio->encrypted_page); 1311 + } 1312 + 1313 + static inline bool need_inplace_update(struct f2fs_io_info *fio) 1314 + { 1315 + struct inode *inode = fio->page->mapping->host; 1316 + 1317 + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) 1318 + return false; 1319 + if (is_cold_data(fio->page)) 1320 + return false; 1321 + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) 1322 + return false; 1323 + 1324 + return need_inplace_update_policy(inode, fio); 1325 + } 1326 + 1327 + static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) 1328 + { 1329 + if (fio->old_blkaddr == NEW_ADDR) 1330 + return false; 1331 + if (fio->old_blkaddr == NULL_ADDR) 1332 + return false; 1333 + return true; 1334 + } 1335 + 1305 1336 int do_write_data_page(struct f2fs_io_info *fio) 1306 1337 { 1307 1338 struct page *page = fio->page; 1308 1339 struct inode *inode = page->mapping->host; 1309 1340 struct dnode_of_data dn; 1341 + struct extent_info ei = {0,0,0}; 1342 + bool ipu_force = false; 1310 1343 int err = 0; 1311 1344 1312 1345 set_new_dnode(&dn, inode, NULL, NULL, 0); 1346 + if (need_inplace_update(fio) && 1347 + f2fs_lookup_extent_cache(inode, page->index, &ei)) { 1348 + fio->old_blkaddr = ei.blk + page->index - ei.fofs; 1349 + 1350 + if (valid_ipu_blkaddr(fio)) { 1351 + ipu_force = true; 1352 + fio->need_lock = false; 1353 + goto got_it; 1354 + } 1355 + } 1356 + 1357 + if (fio->need_lock) 1358 + f2fs_lock_op(fio->sbi); 1359 + 1313 1360 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 1314 1361 if (err) 1315 - return err; 1362 + goto out; 1316 1363 1317 1364 fio->old_blkaddr = dn.data_blkaddr; 1318 1365 ··· 1387 1302 ClearPageUptodate(page); 1388 1303 goto out_writepage; 1389 1304 } 1390 - 1391 - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { 1392 - gfp_t gfp_flags = GFP_NOFS; 1393 - 1394 - /* wait for GCed encrypted page writeback */ 1395 - f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), 1396 - fio->old_blkaddr); 1397 - retry_encrypt: 1398 - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, 1399 - PAGE_SIZE, 0, 1400 - fio->page->index, 1401 - gfp_flags); 1402 - if (IS_ERR(fio->encrypted_page)) { 1403 - err = PTR_ERR(fio->encrypted_page); 1404 - if (err == -ENOMEM) { 1405 - /* flush pending ios and wait for a while */ 1406 - f2fs_flush_merged_bios(F2FS_I_SB(inode)); 1407 - congestion_wait(BLK_RW_ASYNC, HZ/50); 1408 - gfp_flags |= __GFP_NOFAIL; 1409 - err = 0; 1410 - goto retry_encrypt; 1411 - } 1412 - goto out_writepage; 1413 - } 1414 - } 1305 + got_it: 1306 + err = encrypt_one_page(fio); 1307 + if (err) 1308 + goto out_writepage; 1415 1309 1416 1310 set_page_writeback(page); 1417 1311 ··· 1398 1334 * If current allocation needs SSR, 1399 1335 * it had better in-place writes for updated data. 1400 1336 */ 1401 - if (unlikely(fio->old_blkaddr != NEW_ADDR && 1402 - !is_cold_data(page) && 1403 - !IS_ATOMIC_WRITTEN_PAGE(page) && 1404 - need_inplace_update(inode))) { 1405 - rewrite_data_page(fio); 1337 + if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { 1338 + f2fs_put_dnode(&dn); 1339 + if (fio->need_lock) 1340 + f2fs_unlock_op(fio->sbi); 1341 + err = rewrite_data_page(fio); 1342 + trace_f2fs_do_write_data_page(fio->page, IPU); 1406 1343 set_inode_flag(inode, FI_UPDATE_WRITE); 1407 - trace_f2fs_do_write_data_page(page, IPU); 1408 - } else { 1409 - write_data_page(&dn, fio); 1410 - trace_f2fs_do_write_data_page(page, OPU); 1411 - set_inode_flag(inode, FI_APPEND_WRITE); 1412 - if (page->index == 0) 1413 - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 1344 + return err; 1414 1345 } 1346 + 1347 + /* LFS mode write path */ 1348 + write_data_page(&dn, fio); 1349 + trace_f2fs_do_write_data_page(page, OPU); 1350 + set_inode_flag(inode, FI_APPEND_WRITE); 1351 + if (page->index == 0) 1352 + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 1415 1353 out_writepage: 1416 1354 f2fs_put_dnode(&dn); 1355 + out: 1356 + if (fio->need_lock) 1357 + f2fs_unlock_op(fio->sbi); 1417 1358 return err; 1418 1359 } 1419 1360 ··· 1439 1370 .type = DATA, 1440 1371 .op = REQ_OP_WRITE, 1441 1372 .op_flags = wbc_to_write_flags(wbc), 1373 + .old_blkaddr = NULL_ADDR, 1442 1374 .page = page, 1443 1375 .encrypted_page = NULL, 1444 1376 .submitted = false, 1377 + .need_lock = true, 1445 1378 }; 1446 1379 1447 1380 trace_f2fs_writepage(page, DATA); ··· 1479 1408 1480 1409 /* Dentry blocks are controlled by checkpoint */ 1481 1410 if (S_ISDIR(inode->i_mode)) { 1411 + fio.need_lock = false; 1482 1412 err = do_write_data_page(&fio); 1483 1413 goto done; 1484 1414 } ··· 1488 1416 need_balance_fs = true; 1489 1417 else if (has_not_enough_free_secs(sbi, 0, 0)) 1490 1418 goto redirty_out; 1419 + else 1420 + set_inode_flag(inode, FI_HOT_DATA); 1491 1421 1492 1422 err = -EAGAIN; 1493 1423 if (f2fs_has_inline_data(inode)) { ··· 1497 1423 if (!err) 1498 1424 goto out; 1499 1425 } 1500 - f2fs_lock_op(sbi); 1426 + 1501 1427 if (err == -EAGAIN) 1502 1428 err = do_write_data_page(&fio); 1503 1429 if (F2FS_I(inode)->last_disk_size < psize) 1504 1430 F2FS_I(inode)->last_disk_size = psize; 1505 - f2fs_unlock_op(sbi); 1431 + 1506 1432 done: 1507 1433 if (err && err != -ENOENT) 1508 1434 goto redirty_out; ··· 1515 1441 if (wbc->for_reclaim) { 1516 1442 f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, 1517 1443 DATA, WRITE); 1444 + clear_inode_flag(inode, FI_HOT_DATA); 1518 1445 remove_dirty_inode(inode); 1519 1446 submitted = NULL; 1520 1447 } 1521 1448 1522 1449 unlock_page(page); 1523 - f2fs_balance_fs(sbi, need_balance_fs); 1450 + if (!S_ISDIR(inode->i_mode)) 1451 + f2fs_balance_fs(sbi, need_balance_fs); 1524 1452 1525 1453 if (unlikely(f2fs_cp_error(sbi))) { 1526 1454 f2fs_submit_merged_bio(sbi, DATA, WRITE); ··· 1570 1494 int tag; 1571 1495 1572 1496 pagevec_init(&pvec, 0); 1497 + 1498 + if (get_dirty_pages(mapping->host) <= 1499 + SM_I(F2FS_M_SB(mapping))->min_hot_blocks) 1500 + set_inode_flag(mapping->host, FI_HOT_DATA); 1501 + else 1502 + clear_inode_flag(mapping->host, FI_HOT_DATA); 1573 1503 1574 1504 if (wbc->range_cyclic) { 1575 1505 writeback_index = mapping->writeback_index; /* prev offset */ ··· 1662 1580 last_idx = page->index; 1663 1581 } 1664 1582 1665 - if (--wbc->nr_to_write <= 0 && 1666 - wbc->sync_mode == WB_SYNC_NONE) { 1583 + /* give a priority to WB_SYNC threads */ 1584 + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || 1585 + --wbc->nr_to_write <= 0) && 1586 + wbc->sync_mode == WB_SYNC_NONE) { 1667 1587 done = 1; 1668 1588 break; 1669 1589 } ··· 1721 1637 1722 1638 trace_f2fs_writepages(mapping->host, wbc, DATA); 1723 1639 1640 + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ 1641 + if (wbc->sync_mode == WB_SYNC_ALL) 1642 + atomic_inc(&sbi->wb_sync_req); 1643 + else if (atomic_read(&sbi->wb_sync_req)) 1644 + goto skip_write; 1645 + 1724 1646 blk_start_plug(&plug); 1725 1647 ret = f2fs_write_cache_pages(mapping, wbc); 1726 1648 blk_finish_plug(&plug); 1649 + 1650 + if (wbc->sync_mode == WB_SYNC_ALL) 1651 + atomic_dec(&sbi->wb_sync_req); 1727 1652 /* 1728 1653 * if some pages were truncated, we cannot guarantee its mapping->host 1729 1654 * to detect pending bios. ··· 1780 1687 1781 1688 if (f2fs_has_inline_data(inode) || 1782 1689 (pos & PAGE_MASK) >= i_size_read(inode)) { 1783 - f2fs_lock_op(sbi); 1690 + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); 1784 1691 locked = true; 1785 1692 } 1786 1693 restart: ··· 1816 1723 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1817 1724 if (err || dn.data_blkaddr == NULL_ADDR) { 1818 1725 f2fs_put_dnode(&dn); 1819 - f2fs_lock_op(sbi); 1726 + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, 1727 + true); 1820 1728 locked = true; 1821 1729 goto restart; 1822 1730 } ··· 1831 1737 f2fs_put_dnode(&dn); 1832 1738 unlock_out: 1833 1739 if (locked) 1834 - f2fs_unlock_op(sbi); 1740 + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); 1835 1741 return err; 1836 1742 } 1837 1743 ··· 2045 1951 2046 1952 /* This is atomic written page, keep Private */ 2047 1953 if (IS_ATOMIC_WRITTEN_PAGE(page)) 2048 - return; 1954 + return drop_inmem_page(inode, page); 2049 1955 2050 1956 set_page_private(page, 0); 2051 1957 ClearPagePrivate(page);
+44 -18
fs/f2fs/debug.c
··· 51 51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 52 52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 53 53 si->aw_cnt = atomic_read(&sbi->aw_cnt); 54 + si->vw_cnt = atomic_read(&sbi->vw_cnt); 54 55 si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); 56 + si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); 55 57 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); 56 58 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); 57 - if (SM_I(sbi) && SM_I(sbi)->fcc_info) 58 - si->nr_flush = 59 - atomic_read(&SM_I(sbi)->fcc_info->submit_flush); 60 - if (SM_I(sbi) && SM_I(sbi)->dcc_info) 61 - si->nr_discard = 62 - atomic_read(&SM_I(sbi)->dcc_info->submit_discard); 59 + if (SM_I(sbi) && SM_I(sbi)->fcc_info) { 60 + si->nr_flushed = 61 + atomic_read(&SM_I(sbi)->fcc_info->issued_flush); 62 + si->nr_flushing = 63 + atomic_read(&SM_I(sbi)->fcc_info->issing_flush); 64 + } 65 + if (SM_I(sbi) && SM_I(sbi)->dcc_info) { 66 + si->nr_discarded = 67 + atomic_read(&SM_I(sbi)->dcc_info->issued_discard); 68 + si->nr_discarding = 69 + atomic_read(&SM_I(sbi)->dcc_info->issing_discard); 70 + si->nr_discard_cmd = 71 + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); 72 + si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; 73 + } 63 74 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 64 75 si->rsvd_segs = reserved_segments(sbi); 65 76 si->overp_segs = overprovision_segments(sbi); ··· 97 86 si->sits = MAIN_SEGS(sbi); 98 87 si->dirty_sits = SIT_I(sbi)->dirty_sentries; 99 88 si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; 89 + si->avail_nids = NM_I(sbi)->available_nids; 100 90 si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; 101 91 si->bg_gc = sbi->bg_gc; 102 92 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) ··· 111 99 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { 112 100 struct curseg_info *curseg = CURSEG_I(sbi, i); 113 101 si->curseg[i] = curseg->segno; 114 - si->cursec[i] = curseg->segno / sbi->segs_per_sec; 115 - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; 102 + si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); 103 + si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); 116 104 } 117 105 118 106 for (i = 0; i < 2; i++) { ··· 136 124 137 125 bimodal = 0; 138 126 total_vblocks = 0; 139 - blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; 127 + blks_per_sec = BLKS_PER_SEC(sbi); 140 128 hblks_per_sec = blks_per_sec / 2; 141 129 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 142 - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 130 + vblocks = get_valid_blocks(sbi, segno, true); 143 131 dist = abs(vblocks - hblks_per_sec); 144 132 bimodal += dist * dist; 145 133 ··· 168 156 if (si->base_mem) 169 157 goto get_cache; 170 158 171 - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; 159 + /* build stat */ 160 + si->base_mem = sizeof(struct f2fs_stat_info); 161 + 162 + /* build superblock */ 163 + si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; 172 164 si->base_mem += 2 * sizeof(struct f2fs_inode_info); 173 165 si->base_mem += sizeof(*sbi->ckpt); 174 166 si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; ··· 224 208 /* build merge flush thread */ 225 209 if (SM_I(sbi)->fcc_info) 226 210 si->cache_mem += sizeof(struct flush_cmd_control); 227 - if (SM_I(sbi)->dcc_info) 211 + if (SM_I(sbi)->dcc_info) { 228 212 si->cache_mem += sizeof(struct discard_cmd_control); 213 + si->cache_mem += sizeof(struct discard_cmd) * 214 + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); 215 + } 229 216 230 217 /* free nids */ 231 218 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + ··· 349 330 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 350 331 si->ext_tree, si->zombie_tree, si->ext_node); 351 332 seq_puts(s, "\nBalancing F2FS Async:\n"); 352 - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n", 333 + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " 334 + "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", 353 335 si->nr_wb_cp_data, si->nr_wb_data, 354 - si->nr_flush, si->nr_discard); 355 - seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n", 356 - si->inmem_pages, si->aw_cnt, si->max_aw_cnt); 336 + si->nr_flushing, si->nr_flushed, 337 + si->nr_discarding, si->nr_discarded, 338 + si->nr_discard_cmd, si->undiscard_blks); 339 + seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " 340 + "volatile IO: %4d (Max. %4d)\n", 341 + si->inmem_pages, si->aw_cnt, si->max_aw_cnt, 342 + si->vw_cnt, si->max_vw_cnt); 357 343 seq_printf(s, " - nodes: %4d in %4d\n", 358 344 si->ndirty_node, si->node_pages); 359 345 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", ··· 371 347 si->ndirty_imeta); 372 348 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 373 349 si->dirty_nats, si->nats, si->dirty_sits, si->sits); 374 - seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n", 375 - si->free_nids, si->alloc_nids); 350 + seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", 351 + si->free_nids, si->avail_nids, si->alloc_nids); 376 352 seq_puts(s, "\nDistribution of User Blocks:"); 377 353 seq_puts(s, " [ valid | invalid | free ]\n"); 378 354 seq_puts(s, " ["); ··· 458 434 atomic_set(&sbi->inplace_count, 0); 459 435 460 436 atomic_set(&sbi->aw_cnt, 0); 437 + atomic_set(&sbi->vw_cnt, 0); 461 438 atomic_set(&sbi->max_aw_cnt, 0); 439 + atomic_set(&sbi->max_vw_cnt, 0); 462 440 463 441 mutex_lock(&f2fs_stat_mutex); 464 442 list_add_tail(&si->stat_list, &f2fs_stat_list);
+14 -32
fs/f2fs/dir.c
··· 94 94 95 95 dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); 96 96 97 - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 97 + make_dentry_ptr_block(NULL, &d, dentry_blk); 98 98 de = find_target_dentry(fname, namehash, max_slots, &d); 99 99 if (de) 100 100 *res_page = dentry_page; ··· 192 192 f2fs_put_page(dentry_page, 0); 193 193 } 194 194 195 - /* This is to increase the speed of f2fs_create */ 196 - if (!de && room) { 197 - F2FS_I(dir)->task = current; 198 - if (F2FS_I(dir)->chash != namehash) { 199 - F2FS_I(dir)->chash = namehash; 200 - F2FS_I(dir)->clevel = level; 201 - } 195 + if (!de && room && F2FS_I(dir)->chash != namehash) { 196 + F2FS_I(dir)->chash = namehash; 197 + F2FS_I(dir)->clevel = level; 202 198 } 203 199 204 200 return de; ··· 235 239 break; 236 240 } 237 241 out: 242 + /* This is to increase the speed of f2fs_create */ 243 + if (!de) 244 + F2FS_I(dir)->task = current; 238 245 return de; 239 246 } 240 247 ··· 321 322 set_page_dirty(ipage); 322 323 } 323 324 324 - int update_dent_inode(struct inode *inode, struct inode *to, 325 - const struct qstr *name) 326 - { 327 - struct page *page; 328 - 329 - if (file_enc_name(to)) 330 - return 0; 331 - 332 - page = get_node_page(F2FS_I_SB(inode), inode->i_ino); 333 - if (IS_ERR(page)) 334 - return PTR_ERR(page); 335 - 336 - init_dent_inode(name, page); 337 - f2fs_put_page(page, 1); 338 - 339 - return 0; 340 - } 341 - 342 325 void do_make_empty_dir(struct inode *inode, struct inode *parent, 343 326 struct f2fs_dentry_ptr *d) 344 327 { ··· 350 369 351 370 dentry_blk = kmap_atomic(dentry_page); 352 371 353 - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 372 + make_dentry_ptr_block(NULL, &d, dentry_blk); 354 373 do_make_empty_dir(inode, parent, &d); 355 374 356 375 kunmap_atomic(dentry_blk); ··· 404 423 set_cold_node(inode, page); 405 424 } 406 425 407 - if (new_name) 426 + if (new_name) { 408 427 init_dent_inode(new_name, page); 428 + if (f2fs_encrypted_inode(dir)) 429 + file_set_enc_name(inode); 430 + } 409 431 410 432 /* 411 433 * This file should be checkpointed during fsync. ··· 568 584 err = PTR_ERR(page); 569 585 goto fail; 570 586 } 571 - if (f2fs_encrypted_inode(dir)) 572 - file_set_enc_name(inode); 573 587 } 574 588 575 - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 589 + make_dentry_ptr_block(NULL, &d, dentry_blk); 576 590 f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); 577 591 578 592 set_page_dirty(dentry_page); ··· 878 896 879 897 dentry_blk = kmap(dentry_page); 880 898 881 - make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); 899 + make_dentry_ptr_block(inode, &d, dentry_blk); 882 900 883 901 err = f2fs_fill_dentries(ctx, &d, 884 902 n * NR_DENTRY_IN_BLOCK, &fstr);
+196 -130
fs/f2fs/extent_cache.c
··· 18 18 #include "node.h" 19 19 #include <trace/events/f2fs.h> 20 20 21 + static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, 22 + unsigned int ofs) 23 + { 24 + if (cached_re) { 25 + if (cached_re->ofs <= ofs && 26 + cached_re->ofs + cached_re->len > ofs) { 27 + return cached_re; 28 + } 29 + } 30 + return NULL; 31 + } 32 + 33 + static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, 34 + unsigned int ofs) 35 + { 36 + struct rb_node *node = root->rb_node; 37 + struct rb_entry *re; 38 + 39 + while (node) { 40 + re = rb_entry(node, struct rb_entry, rb_node); 41 + 42 + if (ofs < re->ofs) 43 + node = node->rb_left; 44 + else if (ofs >= re->ofs + re->len) 45 + node = node->rb_right; 46 + else 47 + return re; 48 + } 49 + return NULL; 50 + } 51 + 52 + struct rb_entry *__lookup_rb_tree(struct rb_root *root, 53 + struct rb_entry *cached_re, unsigned int ofs) 54 + { 55 + struct rb_entry *re; 56 + 57 + re = __lookup_rb_tree_fast(cached_re, ofs); 58 + if (!re) 59 + return __lookup_rb_tree_slow(root, ofs); 60 + 61 + return re; 62 + } 63 + 64 + struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, 65 + struct rb_root *root, struct rb_node **parent, 66 + unsigned int ofs) 67 + { 68 + struct rb_node **p = &root->rb_node; 69 + struct rb_entry *re; 70 + 71 + while (*p) { 72 + *parent = *p; 73 + re = rb_entry(*parent, struct rb_entry, rb_node); 74 + 75 + if (ofs < re->ofs) 76 + p = &(*p)->rb_left; 77 + else if (ofs >= re->ofs + re->len) 78 + p = &(*p)->rb_right; 79 + else 80 + f2fs_bug_on(sbi, 1); 81 + } 82 + 83 + return p; 84 + } 85 + 86 + /* 87 + * lookup rb entry in position of @ofs in rb-tree, 88 + * if hit, return the entry, otherwise, return NULL 89 + * @prev_ex: extent before ofs 90 + * @next_ex: extent after ofs 91 + * @insert_p: insert point for new extent at ofs 92 + * in order to simpfy the insertion after. 93 + * tree must stay unchanged between lookup and insertion. 94 + */ 95 + struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, 96 + struct rb_entry *cached_re, 97 + unsigned int ofs, 98 + struct rb_entry **prev_entry, 99 + struct rb_entry **next_entry, 100 + struct rb_node ***insert_p, 101 + struct rb_node **insert_parent, 102 + bool force) 103 + { 104 + struct rb_node **pnode = &root->rb_node; 105 + struct rb_node *parent = NULL, *tmp_node; 106 + struct rb_entry *re = cached_re; 107 + 108 + *insert_p = NULL; 109 + *insert_parent = NULL; 110 + *prev_entry = NULL; 111 + *next_entry = NULL; 112 + 113 + if (RB_EMPTY_ROOT(root)) 114 + return NULL; 115 + 116 + if (re) { 117 + if (re->ofs <= ofs && re->ofs + re->len > ofs) 118 + goto lookup_neighbors; 119 + } 120 + 121 + while (*pnode) { 122 + parent = *pnode; 123 + re = rb_entry(*pnode, struct rb_entry, rb_node); 124 + 125 + if (ofs < re->ofs) 126 + pnode = &(*pnode)->rb_left; 127 + else if (ofs >= re->ofs + re->len) 128 + pnode = &(*pnode)->rb_right; 129 + else 130 + goto lookup_neighbors; 131 + } 132 + 133 + *insert_p = pnode; 134 + *insert_parent = parent; 135 + 136 + re = rb_entry(parent, struct rb_entry, rb_node); 137 + tmp_node = parent; 138 + if (parent && ofs > re->ofs) 139 + tmp_node = rb_next(parent); 140 + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 141 + 142 + tmp_node = parent; 143 + if (parent && ofs < re->ofs) 144 + tmp_node = rb_prev(parent); 145 + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 146 + return NULL; 147 + 148 + lookup_neighbors: 149 + if (ofs == re->ofs || force) { 150 + /* lookup prev node for merging backward later */ 151 + tmp_node = rb_prev(&re->rb_node); 152 + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 153 + } 154 + if (ofs == re->ofs + re->len - 1 || force) { 155 + /* lookup next node for merging frontward later */ 156 + tmp_node = rb_next(&re->rb_node); 157 + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 158 + } 159 + return re; 160 + } 161 + 162 + bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, 163 + struct rb_root *root) 164 + { 165 + #ifdef CONFIG_F2FS_CHECK_FS 166 + struct rb_node *cur = rb_first(root), *next; 167 + struct rb_entry *cur_re, *next_re; 168 + 169 + if (!cur) 170 + return true; 171 + 172 + while (cur) { 173 + next = rb_next(cur); 174 + if (!next) 175 + return true; 176 + 177 + cur_re = rb_entry(cur, struct rb_entry, rb_node); 178 + next_re = rb_entry(next, struct rb_entry, rb_node); 179 + 180 + if (cur_re->ofs + cur_re->len > next_re->ofs) { 181 + f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " 182 + "cur(%u, %u) next(%u, %u)", 183 + cur_re->ofs, cur_re->len, 184 + next_re->ofs, next_re->len); 185 + return false; 186 + } 187 + 188 + cur = next; 189 + } 190 + #endif 191 + return true; 192 + } 193 + 21 194 static struct kmem_cache *extent_tree_slab; 22 195 static struct kmem_cache *extent_node_slab; 23 196 ··· 273 100 F2FS_I(inode)->extent_tree = et; 274 101 275 102 return et; 276 - } 277 - 278 - static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, 279 - struct extent_tree *et, unsigned int fofs) 280 - { 281 - struct rb_node *node = et->root.rb_node; 282 - struct extent_node *en = et->cached_en; 283 - 284 - if (en) { 285 - struct extent_info *cei = &en->ei; 286 - 287 - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { 288 - stat_inc_cached_node_hit(sbi); 289 - return en; 290 - } 291 - } 292 - 293 - while (node) { 294 - en = rb_entry(node, struct extent_node, rb_node); 295 - 296 - if (fofs < en->ei.fofs) { 297 - node = node->rb_left; 298 - } else if (fofs >= en->ei.fofs + en->ei.len) { 299 - node = node->rb_right; 300 - } else { 301 - stat_inc_rbtree_node_hit(sbi); 302 - return en; 303 - } 304 - } 305 - return NULL; 306 103 } 307 104 308 105 static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, ··· 380 237 goto out; 381 238 } 382 239 383 - en = __lookup_extent_tree(sbi, et, pgofs); 384 - if (en) { 385 - *ei = en->ei; 386 - spin_lock(&sbi->extent_lock); 387 - if (!list_empty(&en->list)) { 388 - list_move_tail(&en->list, &sbi->extent_list); 389 - et->cached_en = en; 390 - } 391 - spin_unlock(&sbi->extent_lock); 392 - ret = true; 240 + en = (struct extent_node *)__lookup_rb_tree(&et->root, 241 + (struct rb_entry *)et->cached_en, pgofs); 242 + if (!en) 243 + goto out; 244 + 245 + if (en == et->cached_en) 246 + stat_inc_cached_node_hit(sbi); 247 + else 248 + stat_inc_rbtree_node_hit(sbi); 249 + 250 + *ei = en->ei; 251 + spin_lock(&sbi->extent_lock); 252 + if (!list_empty(&en->list)) { 253 + list_move_tail(&en->list, &sbi->extent_list); 254 + et->cached_en = en; 393 255 } 256 + spin_unlock(&sbi->extent_lock); 257 + ret = true; 394 258 out: 395 259 stat_inc_total_hit(sbi); 396 260 read_unlock(&et->lock); 397 261 398 262 trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); 399 263 return ret; 400 - } 401 - 402 - 403 - /* 404 - * lookup extent at @fofs, if hit, return the extent 405 - * if not, return NULL and 406 - * @prev_ex: extent before fofs 407 - * @next_ex: extent after fofs 408 - * @insert_p: insert point for new extent at fofs 409 - * in order to simpfy the insertion after. 410 - * tree must stay unchanged between lookup and insertion. 411 - */ 412 - static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, 413 - unsigned int fofs, 414 - struct extent_node **prev_ex, 415 - struct extent_node **next_ex, 416 - struct rb_node ***insert_p, 417 - struct rb_node **insert_parent) 418 - { 419 - struct rb_node **pnode = &et->root.rb_node; 420 - struct rb_node *parent = NULL, *tmp_node; 421 - struct extent_node *en = et->cached_en; 422 - 423 - *insert_p = NULL; 424 - *insert_parent = NULL; 425 - *prev_ex = NULL; 426 - *next_ex = NULL; 427 - 428 - if (RB_EMPTY_ROOT(&et->root)) 429 - return NULL; 430 - 431 - if (en) { 432 - struct extent_info *cei = &en->ei; 433 - 434 - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) 435 - goto lookup_neighbors; 436 - } 437 - 438 - while (*pnode) { 439 - parent = *pnode; 440 - en = rb_entry(*pnode, struct extent_node, rb_node); 441 - 442 - if (fofs < en->ei.fofs) 443 - pnode = &(*pnode)->rb_left; 444 - else if (fofs >= en->ei.fofs + en->ei.len) 445 - pnode = &(*pnode)->rb_right; 446 - else 447 - goto lookup_neighbors; 448 - } 449 - 450 - *insert_p = pnode; 451 - *insert_parent = parent; 452 - 453 - en = rb_entry(parent, struct extent_node, rb_node); 454 - tmp_node = parent; 455 - if (parent && fofs > en->ei.fofs) 456 - tmp_node = rb_next(parent); 457 - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); 458 - 459 - tmp_node = parent; 460 - if (parent && fofs < en->ei.fofs) 461 - tmp_node = rb_prev(parent); 462 - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); 463 - return NULL; 464 - 465 - lookup_neighbors: 466 - if (fofs == en->ei.fofs) { 467 - /* lookup prev node for merging backward later */ 468 - tmp_node = rb_prev(&en->rb_node); 469 - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); 470 - } 471 - if (fofs == en->ei.fofs + en->ei.len - 1) { 472 - /* lookup next node for merging frontward later */ 473 - tmp_node = rb_next(&en->rb_node); 474 - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); 475 - } 476 - return en; 477 264 } 478 265 479 266 static struct extent_node *__try_merge_extent_node(struct inode *inode, ··· 460 387 goto do_insert; 461 388 } 462 389 463 - while (*p) { 464 - parent = *p; 465 - en = rb_entry(parent, struct extent_node, rb_node); 466 - 467 - if (ei->fofs < en->ei.fofs) 468 - p = &(*p)->rb_left; 469 - else if (ei->fofs >= en->ei.fofs + en->ei.len) 470 - p = &(*p)->rb_right; 471 - else 472 - f2fs_bug_on(sbi, 1); 473 - } 390 + p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); 474 391 do_insert: 475 392 en = __attach_extent_node(sbi, et, ei, parent, p); 476 393 if (!en) ··· 510 447 __drop_largest_extent(inode, fofs, len); 511 448 512 449 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 513 - en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, 514 - &insert_p, &insert_parent); 450 + en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, 451 + (struct rb_entry *)et->cached_en, fofs, 452 + (struct rb_entry **)&prev_en, 453 + (struct rb_entry **)&next_en, 454 + &insert_p, &insert_parent, false); 515 455 if (!en) 516 456 en = next_en; 517 457
+206 -116
fs/f2fs/f2fs.h
··· 50 50 FAULT_BLOCK, 51 51 FAULT_DIR_DEPTH, 52 52 FAULT_EVICT_INODE, 53 + FAULT_TRUNCATE, 53 54 FAULT_IO, 54 55 FAULT_CHECKPOINT, 55 56 FAULT_MAX, ··· 63 62 }; 64 63 65 64 extern char *fault_name[FAULT_MAX]; 66 - #define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type))) 65 + #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) 67 66 #endif 68 67 69 68 /* ··· 89 88 #define F2FS_MOUNT_ADAPTIVE 0x00020000 90 89 #define F2FS_MOUNT_LFS 0x00040000 91 90 92 - #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 93 - #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 94 - #define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) 91 + #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) 92 + #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) 93 + #define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) 95 94 96 95 #define ver_after(a, b) (typecheck(unsigned long long, a) && \ 97 96 typecheck(unsigned long long, b) && \ ··· 125 124 SIT_BITMAP 126 125 }; 127 126 128 - enum { 129 - CP_UMOUNT, 130 - CP_FASTBOOT, 131 - CP_SYNC, 132 - CP_RECOVERY, 133 - CP_DISCARD, 134 - }; 127 + #define CP_UMOUNT 0x00000001 128 + #define CP_FASTBOOT 0x00000002 129 + #define CP_SYNC 0x00000004 130 + #define CP_RECOVERY 0x00000008 131 + #define CP_DISCARD 0x00000010 132 + #define CP_TRIMMED 0x00000020 135 133 136 134 #define DEF_BATCHED_TRIM_SECTIONS 2048 137 135 #define BATCHED_TRIM_SEGMENTS(sbi) \ 138 - (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) 136 + (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) 139 137 #define BATCHED_TRIM_BLOCKS(sbi) \ 140 138 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 141 - #define MAX_DISCARD_BLOCKS(sbi) \ 142 - ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec) 143 - #define DISCARD_ISSUE_RATE 8 139 + #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) 140 + #define DISCARD_ISSUE_RATE 8 144 141 #define DEF_CP_INTERVAL 60 /* 60 secs */ 145 142 #define DEF_IDLE_INTERVAL 5 /* 5 secs */ 146 143 ··· 180 181 struct inode *inode; /* vfs inode pointer */ 181 182 }; 182 183 183 - /* for the list of blockaddresses to be discarded */ 184 + /* for the bitmap indicate blocks to be discarded */ 184 185 struct discard_entry { 185 186 struct list_head list; /* list head */ 186 - block_t blkaddr; /* block address to be discarded */ 187 - int len; /* # of consecutive blocks of the discard */ 187 + block_t start_blkaddr; /* start blockaddr of current segment */ 188 + unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ 188 189 }; 190 + 191 + /* max discard pend list number */ 192 + #define MAX_PLIST_NUM 512 193 + #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ 194 + (MAX_PLIST_NUM - 1) : (blk_num - 1)) 189 195 190 196 enum { 191 197 D_PREP, ··· 198 194 D_DONE, 199 195 }; 200 196 201 - struct discard_cmd { 202 - struct list_head list; /* command list */ 203 - struct completion wait; /* compleation */ 197 + struct discard_info { 204 198 block_t lstart; /* logical start address */ 205 199 block_t len; /* length */ 206 - struct bio *bio; /* bio */ 207 - int state; /* state */ 200 + block_t start; /* actual start address in dev */ 201 + }; 202 + 203 + struct discard_cmd { 204 + struct rb_node rb_node; /* rb node located in rb-tree */ 205 + union { 206 + struct { 207 + block_t lstart; /* logical start address */ 208 + block_t len; /* length */ 209 + block_t start; /* actual start address in dev */ 210 + }; 211 + struct discard_info di; /* discard info */ 212 + 213 + }; 214 + struct list_head list; /* command list */ 215 + struct completion wait; /* compleation */ 216 + struct block_device *bdev; /* bdev */ 217 + unsigned short ref; /* reference count */ 218 + unsigned char state; /* state */ 219 + int error; /* bio error */ 208 220 }; 209 221 210 222 struct discard_cmd_control { 211 223 struct task_struct *f2fs_issue_discard; /* discard thread */ 212 - struct list_head discard_entry_list; /* 4KB discard entry list */ 213 - int nr_discards; /* # of discards in the list */ 214 - struct list_head discard_cmd_list; /* discard cmd list */ 224 + struct list_head entry_list; /* 4KB discard entry list */ 225 + struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ 226 + struct list_head wait_list; /* store on-flushing entries */ 215 227 wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ 216 228 struct mutex cmd_lock; 217 - int max_discards; /* max. discards to be issued */ 218 - atomic_t submit_discard; /* # of issued discard */ 229 + unsigned int nr_discards; /* # of discards in the list */ 230 + unsigned int max_discards; /* max. discards to be issued */ 231 + unsigned int undiscard_blks; /* # of undiscard blocks */ 232 + atomic_t issued_discard; /* # of issued discard */ 233 + atomic_t issing_discard; /* # of issing discard */ 234 + atomic_t discard_cmd_cnt; /* # of cached cmd count */ 235 + struct rb_root root; /* root of discard rb-tree */ 219 236 }; 220 237 221 238 /* for the list of fsync inodes, used only during recovery */ ··· 247 222 block_t last_dentry; /* block address locating the last dentry */ 248 223 }; 249 224 250 - #define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) 251 - #define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits)) 225 + #define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats)) 226 + #define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits)) 252 227 253 - #define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne) 254 - #define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid) 255 - #define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se) 256 - #define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno) 228 + #define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne) 229 + #define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid) 230 + #define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se) 231 + #define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno) 257 232 258 233 #define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl)) 259 234 #define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl)) ··· 295 270 #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 296 271 #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 297 272 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 298 - #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) 273 + #define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) 299 274 #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) 300 - #define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) 275 + #define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ 276 + struct f2fs_defragment) 301 277 #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ 302 278 struct f2fs_move_range) 279 + #define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ 280 + struct f2fs_flush_device) 303 281 304 282 #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY 305 283 #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY ··· 339 311 u64 len; /* size to move */ 340 312 }; 341 313 314 + struct f2fs_flush_device { 315 + u32 dev_num; /* device number to flush */ 316 + u32 segments; /* # of segments to flush */ 317 + }; 318 + 342 319 /* 343 320 * For INODE and NODE manager 344 321 */ ··· 356 323 int max; 357 324 }; 358 325 359 - static inline void make_dentry_ptr(struct inode *inode, 360 - struct f2fs_dentry_ptr *d, void *src, int type) 326 + static inline void make_dentry_ptr_block(struct inode *inode, 327 + struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t) 361 328 { 362 329 d->inode = inode; 330 + d->max = NR_DENTRY_IN_BLOCK; 331 + d->bitmap = &t->dentry_bitmap; 332 + d->dentry = t->dentry; 333 + d->filename = t->filename; 334 + } 363 335 364 - if (type == 1) { 365 - struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; 366 - 367 - d->max = NR_DENTRY_IN_BLOCK; 368 - d->bitmap = &t->dentry_bitmap; 369 - d->dentry = t->dentry; 370 - d->filename = t->filename; 371 - } else { 372 - struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; 373 - 374 - d->max = NR_INLINE_DENTRY; 375 - d->bitmap = &t->dentry_bitmap; 376 - d->dentry = t->dentry; 377 - d->filename = t->filename; 378 - } 336 + static inline void make_dentry_ptr_inline(struct inode *inode, 337 + struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t) 338 + { 339 + d->inode = inode; 340 + d->max = NR_INLINE_DENTRY; 341 + d->bitmap = &t->dentry_bitmap; 342 + d->dentry = t->dentry; 343 + d->filename = t->filename; 379 344 } 380 345 381 346 /* ··· 405 374 /* number of extent info in extent cache we try to shrink */ 406 375 #define EXTENT_CACHE_SHRINK_NUMBER 128 407 376 377 + struct rb_entry { 378 + struct rb_node rb_node; /* rb node located in rb-tree */ 379 + unsigned int ofs; /* start offset of the entry */ 380 + unsigned int len; /* length of the entry */ 381 + }; 382 + 408 383 struct extent_info { 409 384 unsigned int fofs; /* start offset in a file */ 410 - u32 blk; /* start block address of the extent */ 411 385 unsigned int len; /* length of the extent */ 386 + u32 blk; /* start block address of the extent */ 412 387 }; 413 388 414 389 struct extent_node { 415 - struct rb_node rb_node; /* rb node located in rb-tree */ 390 + struct rb_node rb_node; 391 + union { 392 + struct { 393 + unsigned int fofs; 394 + unsigned int len; 395 + u32 blk; 396 + }; 397 + struct extent_info ei; /* extent info */ 398 + 399 + }; 416 400 struct list_head list; /* node in global extent list of sbi */ 417 - struct extent_info ei; /* extent info */ 418 401 struct extent_tree *et; /* extent tree pointer */ 419 402 }; 420 403 ··· 545 500 ei->len = len; 546 501 } 547 502 503 + static inline bool __is_discard_mergeable(struct discard_info *back, 504 + struct discard_info *front) 505 + { 506 + return back->lstart + back->len == front->lstart; 507 + } 508 + 509 + static inline bool __is_discard_back_mergeable(struct discard_info *cur, 510 + struct discard_info *back) 511 + { 512 + return __is_discard_mergeable(back, cur); 513 + } 514 + 515 + static inline bool __is_discard_front_mergeable(struct discard_info *cur, 516 + struct discard_info *front) 517 + { 518 + return __is_discard_mergeable(cur, front); 519 + } 520 + 548 521 static inline bool __is_extent_mergeable(struct extent_info *back, 549 522 struct extent_info *front) 550 523 { ··· 625 562 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; 626 563 unsigned char *nat_block_bitmap; 627 564 unsigned short *free_nid_count; /* free nid count of NAT block */ 628 - spinlock_t free_nid_lock; /* protect updating of nid count */ 629 565 630 566 /* for checkpoint */ 631 567 char *nat_bitmap; /* NAT bitmap pointer */ ··· 703 641 struct flush_cmd_control { 704 642 struct task_struct *f2fs_issue_flush; /* flush thread */ 705 643 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ 706 - atomic_t submit_flush; /* # of issued flushes */ 644 + atomic_t issued_flush; /* # of issued flushes */ 645 + atomic_t issing_flush; /* # of issing flushes */ 707 646 struct llist_head issue_list; /* list for command issue */ 708 647 struct llist_node *dispatch_list; /* list for command dispatch */ 709 648 }; ··· 735 672 unsigned int ipu_policy; /* in-place-update policy */ 736 673 unsigned int min_ipu_util; /* in-place-update threshold */ 737 674 unsigned int min_fsync_blocks; /* threshold for fsync */ 675 + unsigned int min_hot_blocks; /* threshold for hot block allocation */ 738 676 739 677 /* for flush command control */ 740 678 struct flush_cmd_control *fcc_info; ··· 786 722 META_FLUSH, 787 723 INMEM, /* the below types are used by tracepoints only. */ 788 724 INMEM_DROP, 725 + INMEM_INVALIDATE, 789 726 INMEM_REVOKE, 790 727 IPU, 791 728 OPU, ··· 802 737 struct page *page; /* page to be written */ 803 738 struct page *encrypted_page; /* encrypted page */ 804 739 bool submitted; /* indicate IO submission */ 740 + bool need_lock; /* indicate we need to lock cp_rwsem */ 805 741 }; 806 742 807 - #define is_read_io(rw) (rw == READ) 743 + #define is_read_io(rw) ((rw) == READ) 808 744 struct f2fs_bio_info { 809 745 struct f2fs_sb_info *sbi; /* f2fs superblock */ 810 746 struct bio *bio; /* bios to merge */ ··· 893 827 struct mutex cp_mutex; /* checkpoint procedure lock */ 894 828 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 895 829 struct rw_semaphore node_write; /* locking node writes */ 830 + struct rw_semaphore node_change; /* locking node change */ 896 831 wait_queue_head_t cp_wait; 897 832 unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ 898 833 long interval_time[MAX_TIME]; /* to store thresholds */ ··· 946 879 /* # of allocated blocks */ 947 880 struct percpu_counter alloc_valid_block_count; 948 881 882 + /* writeback control */ 883 + atomic_t wb_sync_req; /* count # of WB_SYNC threads */ 884 + 949 885 /* valid inode count */ 950 886 struct percpu_counter total_valid_inode_count; 951 887 ··· 982 912 atomic_t inline_inode; /* # of inline_data inodes */ 983 913 atomic_t inline_dir; /* # of inline_dentry inodes */ 984 914 atomic_t aw_cnt; /* # of atomic writes */ 915 + atomic_t vw_cnt; /* # of volatile writes */ 985 916 atomic_t max_aw_cnt; /* max # of atomic writes */ 917 + atomic_t max_vw_cnt; /* max # of volatile writes */ 986 918 int bg_gc; /* background gc calls */ 987 919 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ 988 920 #endif 989 - unsigned int last_victim[2]; /* last victim segment # */ 990 921 spinlock_t stat_lock; /* lock for stat operations */ 991 922 992 923 /* For sysfs suppport */ ··· 1042 971 * and the return value is in kbytes. s is of struct f2fs_sb_info. 1043 972 */ 1044 973 #define BD_PART_WRITTEN(s) \ 1045 - (((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \ 1046 - s->sectors_written_start) >> 1) 974 + (((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ 975 + (s)->sectors_written_start) >> 1) 1047 976 1048 977 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) 1049 978 { ··· 1264 1193 { 1265 1194 bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); 1266 1195 1267 - return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set; 1196 + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; 1268 1197 } 1269 1198 1270 1199 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) ··· 1300 1229 1301 1230 static inline bool __remain_node_summaries(int reason) 1302 1231 { 1303 - return (reason == CP_UMOUNT || reason == CP_FASTBOOT); 1232 + return (reason & (CP_UMOUNT | CP_FASTBOOT)); 1304 1233 } 1305 1234 1306 1235 static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) ··· 1778 1707 FI_DO_DEFRAG, /* indicate defragment is running */ 1779 1708 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ 1780 1709 FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ 1710 + FI_HOT_DATA, /* indicate file is hot */ 1781 1711 }; 1782 1712 1783 1713 static inline void __mark_inode_dirty_flag(struct inode *inode, ··· 1941 1869 return is_inode_flag_set(inode, FI_INLINE_DATA); 1942 1870 } 1943 1871 1944 - static inline void f2fs_clear_inline_inode(struct inode *inode) 1945 - { 1946 - clear_inode_flag(inode, FI_INLINE_DATA); 1947 - clear_inode_flag(inode, FI_DATA_EXIST); 1948 - } 1949 - 1950 1872 static inline int f2fs_exist_data(struct inode *inode) 1951 1873 { 1952 1874 return is_inode_flag_set(inode, FI_DATA_EXIST); ··· 2095 2029 ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ 2096 2030 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 2097 2031 2098 - /* get offset of first page in next direct node */ 2099 - #define PGOFS_OF_NEXT_DNODE(pgofs, inode) \ 2100 - ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \ 2101 - (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ 2102 - ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) 2103 - 2104 2032 /* 2105 2033 * file.c 2106 2034 */ ··· 2156 2096 struct page **page); 2157 2097 void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, 2158 2098 struct page *page, struct inode *inode); 2159 - int update_dent_inode(struct inode *inode, struct inode *to, 2160 - const struct qstr *name); 2161 2099 void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, 2162 2100 const struct qstr *name, f2fs_hash_t name_hash, 2163 2101 unsigned int bit_pos); ··· 2243 2185 */ 2244 2186 void register_inmem_page(struct inode *inode, struct page *page); 2245 2187 void drop_inmem_pages(struct inode *inode); 2188 + void drop_inmem_page(struct inode *inode, struct page *page); 2246 2189 int commit_inmem_pages(struct inode *inode); 2247 2190 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); 2248 2191 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); ··· 2253 2194 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); 2254 2195 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); 2255 2196 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); 2256 - void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr); 2197 + void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); 2257 2198 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); 2258 2199 void release_discard_addrs(struct f2fs_sb_info *sbi); 2259 2200 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); ··· 2265 2206 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); 2266 2207 void write_node_page(unsigned int nid, struct f2fs_io_info *fio); 2267 2208 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); 2268 - void rewrite_data_page(struct f2fs_io_info *fio); 2209 + int rewrite_data_page(struct f2fs_io_info *fio); 2269 2210 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 2270 2211 block_t old_blkaddr, block_t new_blkaddr, 2271 2212 bool recover_curseg, bool recover_newaddr); ··· 2370 2311 int start_gc_thread(struct f2fs_sb_info *sbi); 2371 2312 void stop_gc_thread(struct f2fs_sb_info *sbi); 2372 2313 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); 2373 - int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background); 2314 + int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, 2315 + unsigned int segno); 2374 2316 void build_gc_manager(struct f2fs_sb_info *sbi); 2375 2317 2376 2318 /* ··· 2395 2335 int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; 2396 2336 int inmem_pages; 2397 2337 unsigned int ndirty_dirs, ndirty_files, ndirty_all; 2398 - int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; 2338 + int nats, dirty_nats, sits, dirty_sits; 2339 + int free_nids, avail_nids, alloc_nids; 2399 2340 int total_count, utilization; 2400 - int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard; 2341 + int bg_gc, nr_wb_cp_data, nr_wb_data; 2342 + int nr_flushing, nr_flushed, nr_discarding, nr_discarded; 2343 + int nr_discard_cmd; 2344 + unsigned int undiscard_blks; 2401 2345 int inline_xattr, inline_inode, inline_dir, append, update, orphans; 2402 - int aw_cnt, max_aw_cnt; 2346 + int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; 2403 2347 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; 2404 2348 unsigned int bimodal, avg_vblocks; 2405 2349 int util_free, util_valid, util_invalid; ··· 2486 2422 if (cur > max) \ 2487 2423 atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ 2488 2424 } while (0) 2425 + #define stat_inc_volatile_write(inode) \ 2426 + (atomic_inc(&F2FS_I_SB(inode)->vw_cnt)) 2427 + #define stat_dec_volatile_write(inode) \ 2428 + (atomic_dec(&F2FS_I_SB(inode)->vw_cnt)) 2429 + #define stat_update_max_volatile_write(inode) \ 2430 + do { \ 2431 + int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \ 2432 + int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \ 2433 + if (cur > max) \ 2434 + atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \ 2435 + } while (0) 2489 2436 #define stat_inc_seg_count(sbi, type, gc_type) \ 2490 2437 do { \ 2491 2438 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 2492 - (si)->tot_segs++; \ 2493 - if (type == SUM_TYPE_DATA) { \ 2439 + si->tot_segs++; \ 2440 + if ((type) == SUM_TYPE_DATA) { \ 2494 2441 si->data_segs++; \ 2495 2442 si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ 2496 2443 } else { \ ··· 2511 2436 } while (0) 2512 2437 2513 2438 #define stat_inc_tot_blk_count(si, blks) \ 2514 - (si->tot_blks += (blks)) 2439 + ((si)->tot_blks += (blks)) 2515 2440 2516 2441 #define stat_inc_data_blk_count(sbi, blks, gc_type) \ 2517 2442 do { \ 2518 2443 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 2519 2444 stat_inc_tot_blk_count(si, blks); \ 2520 2445 si->data_blks += (blks); \ 2521 - si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ 2446 + si->bg_data_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ 2522 2447 } while (0) 2523 2448 2524 2449 #define stat_inc_node_blk_count(sbi, blks, gc_type) \ ··· 2526 2451 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 2527 2452 stat_inc_tot_blk_count(si, blks); \ 2528 2453 si->node_blks += (blks); \ 2529 - si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ 2454 + si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ 2530 2455 } while (0) 2531 2456 2532 2457 int f2fs_build_stats(struct f2fs_sb_info *sbi); ··· 2534 2459 int __init f2fs_create_root_stats(void); 2535 2460 void f2fs_destroy_root_stats(void); 2536 2461 #else 2537 - #define stat_inc_cp_count(si) 2538 - #define stat_inc_bg_cp_count(si) 2539 - #define stat_inc_call_count(si) 2540 - #define stat_inc_bggc_count(si) 2541 - #define stat_inc_dirty_inode(sbi, type) 2542 - #define stat_dec_dirty_inode(sbi, type) 2543 - #define stat_inc_total_hit(sb) 2544 - #define stat_inc_rbtree_node_hit(sb) 2545 - #define stat_inc_largest_node_hit(sbi) 2546 - #define stat_inc_cached_node_hit(sbi) 2547 - #define stat_inc_inline_xattr(inode) 2548 - #define stat_dec_inline_xattr(inode) 2549 - #define stat_inc_inline_inode(inode) 2550 - #define stat_dec_inline_inode(inode) 2551 - #define stat_inc_inline_dir(inode) 2552 - #define stat_dec_inline_dir(inode) 2553 - #define stat_inc_atomic_write(inode) 2554 - #define stat_dec_atomic_write(inode) 2555 - #define stat_update_max_atomic_write(inode) 2556 - #define stat_inc_seg_type(sbi, curseg) 2557 - #define stat_inc_block_count(sbi, curseg) 2558 - #define stat_inc_inplace_blocks(sbi) 2559 - #define stat_inc_seg_count(sbi, type, gc_type) 2560 - #define stat_inc_tot_blk_count(si, blks) 2561 - #define stat_inc_data_blk_count(sbi, blks, gc_type) 2562 - #define stat_inc_node_blk_count(sbi, blks, gc_type) 2462 + #define stat_inc_cp_count(si) do { } while (0) 2463 + #define stat_inc_bg_cp_count(si) do { } while (0) 2464 + #define stat_inc_call_count(si) do { } while (0) 2465 + #define stat_inc_bggc_count(si) do { } while (0) 2466 + #define stat_inc_dirty_inode(sbi, type) do { } while (0) 2467 + #define stat_dec_dirty_inode(sbi, type) do { } while (0) 2468 + #define stat_inc_total_hit(sb) do { } while (0) 2469 + #define stat_inc_rbtree_node_hit(sb) do { } while (0) 2470 + #define stat_inc_largest_node_hit(sbi) do { } while (0) 2471 + #define stat_inc_cached_node_hit(sbi) do { } while (0) 2472 + #define stat_inc_inline_xattr(inode) do { } while (0) 2473 + #define stat_dec_inline_xattr(inode) do { } while (0) 2474 + #define stat_inc_inline_inode(inode) do { } while (0) 2475 + #define stat_dec_inline_inode(inode) do { } while (0) 2476 + #define stat_inc_inline_dir(inode) do { } while (0) 2477 + #define stat_dec_inline_dir(inode) do { } while (0) 2478 + #define stat_inc_atomic_write(inode) do { } while (0) 2479 + #define stat_dec_atomic_write(inode) do { } while (0) 2480 + #define stat_update_max_atomic_write(inode) do { } while (0) 2481 + #define stat_inc_volatile_write(inode) do { } while (0) 2482 + #define stat_dec_volatile_write(inode) do { } while (0) 2483 + #define stat_update_max_volatile_write(inode) do { } while (0) 2484 + #define stat_inc_seg_type(sbi, curseg) do { } while (0) 2485 + #define stat_inc_block_count(sbi, curseg) do { } while (0) 2486 + #define stat_inc_inplace_blocks(sbi) do { } while (0) 2487 + #define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) 2488 + #define stat_inc_tot_blk_count(si, blks) do { } while (0) 2489 + #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) 2490 + #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) 2563 2491 2564 2492 static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 2565 2493 static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } ··· 2588 2510 bool f2fs_may_inline_data(struct inode *inode); 2589 2511 bool f2fs_may_inline_dentry(struct inode *inode); 2590 2512 void read_inline_data(struct page *page, struct page *ipage); 2591 - bool truncate_inline_inode(struct page *ipage, u64 from); 2513 + void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); 2592 2514 int f2fs_read_inline_data(struct inode *inode, struct page *page); 2593 2515 int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); 2594 2516 int f2fs_convert_inline_inode(struct inode *inode); ··· 2623 2545 /* 2624 2546 * extent_cache.c 2625 2547 */ 2548 + struct rb_entry *__lookup_rb_tree(struct rb_root *root, 2549 + struct rb_entry *cached_re, unsigned int ofs); 2550 + struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, 2551 + struct rb_root *root, struct rb_node **parent, 2552 + unsigned int ofs); 2553 + struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, 2554 + struct rb_entry *cached_re, unsigned int ofs, 2555 + struct rb_entry **prev_entry, struct rb_entry **next_entry, 2556 + struct rb_node ***insert_p, struct rb_node **insert_parent, 2557 + bool force); 2558 + bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, 2559 + struct rb_root *root); 2626 2560 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); 2627 2561 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); 2628 2562 void f2fs_drop_extent_tree(struct inode *inode);
+127 -48
fs/f2fs/file.c
··· 116 116 if (!dentry) 117 117 return 0; 118 118 119 - if (update_dent_inode(inode, inode, &dentry->d_name)) { 120 - dput(dentry); 121 - return 0; 122 - } 123 - 124 119 *pino = parent_ino(dentry); 125 120 dput(dentry); 126 121 return 1; ··· 523 528 524 529 page = get_lock_data_page(inode, index, true); 525 530 if (IS_ERR(page)) 526 - return 0; 531 + return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); 527 532 truncate_out: 528 533 f2fs_wait_on_page_writeback(page, DATA, true); 529 534 zero_user(page, offset, PAGE_SIZE - offset); ··· 561 566 } 562 567 563 568 if (f2fs_has_inline_data(inode)) { 564 - truncate_inline_inode(ipage, from); 565 - if (from == 0) 566 - clear_inode_flag(inode, FI_DATA_EXIST); 569 + truncate_inline_inode(inode, ipage, from); 567 570 f2fs_put_page(ipage, 1); 568 571 truncate_page = true; 569 572 goto out; ··· 610 617 611 618 trace_f2fs_truncate(inode); 612 619 620 + #ifdef CONFIG_F2FS_FAULT_INJECTION 621 + if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { 622 + f2fs_show_injection_info(FAULT_TRUNCATE); 623 + return -EIO; 624 + } 625 + #endif 613 626 /* we should check inline_data size */ 614 627 if (!f2fs_may_inline_data(inode)) { 615 628 err = f2fs_convert_inline_inode(inode); ··· 1187 1188 if (ret) 1188 1189 return ret; 1189 1190 1190 - if (offset + len > new_size) 1191 - new_size = offset + len; 1192 1191 new_size = max_t(loff_t, new_size, offset + len); 1193 1192 } else { 1194 1193 if (off_start) { ··· 1254 1257 int ret = 0; 1255 1258 1256 1259 new_size = i_size_read(inode) + len; 1257 - if (new_size > inode->i_sb->s_maxbytes) 1258 - return -EFBIG; 1260 + ret = inode_newsize_ok(inode, new_size); 1261 + if (ret) 1262 + return ret; 1259 1263 1260 1264 if (offset >= i_size_read(inode)) 1261 1265 return -EINVAL; ··· 1426 1428 drop_inmem_pages(inode); 1427 1429 if (f2fs_is_volatile_file(inode)) { 1428 1430 clear_inode_flag(inode, FI_VOLATILE_FILE); 1431 + stat_dec_volatile_write(inode); 1429 1432 set_inode_flag(inode, FI_DROP_CACHE); 1430 1433 filemap_fdatawrite(inode->i_mapping); 1431 1434 clear_inode_flag(inode, FI_DROP_CACHE); ··· 1473 1474 if (ret) 1474 1475 return ret; 1475 1476 1476 - flags = f2fs_mask_flags(inode->i_mode, flags); 1477 - 1478 1477 inode_lock(inode); 1478 + 1479 + flags = f2fs_mask_flags(inode->i_mode, flags); 1479 1480 1480 1481 oldflags = fi->i_flags; 1481 1482 ··· 1490 1491 flags = flags & FS_FL_USER_MODIFIABLE; 1491 1492 flags |= oldflags & ~FS_FL_USER_MODIFIABLE; 1492 1493 fi->i_flags = flags; 1493 - inode_unlock(inode); 1494 1494 1495 1495 inode->i_ctime = current_time(inode); 1496 1496 f2fs_set_inode_flags(inode); 1497 + 1498 + inode_unlock(inode); 1497 1499 out: 1498 1500 mnt_drop_write_file(filp); 1499 1501 return ret; ··· 1515 1515 if (!inode_owner_or_capable(inode)) 1516 1516 return -EACCES; 1517 1517 1518 + if (!S_ISREG(inode->i_mode)) 1519 + return -EINVAL; 1520 + 1518 1521 ret = mnt_want_write_file(filp); 1519 1522 if (ret) 1520 1523 return ret; ··· 1532 1529 goto out; 1533 1530 1534 1531 set_inode_flag(inode, FI_ATOMIC_FILE); 1532 + set_inode_flag(inode, FI_HOT_DATA); 1535 1533 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1536 1534 1537 1535 if (!get_dirty_pages(inode)) 1538 - goto out; 1536 + goto inc_stat; 1539 1537 1540 1538 f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, 1541 1539 "Unexpected flush for atomic writes: ino=%lu, npages=%u", 1542 1540 inode->i_ino, get_dirty_pages(inode)); 1543 1541 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 1544 - if (ret) 1542 + if (ret) { 1545 1543 clear_inode_flag(inode, FI_ATOMIC_FILE); 1546 - out: 1544 + goto out; 1545 + } 1546 + 1547 + inc_stat: 1547 1548 stat_inc_atomic_write(inode); 1548 1549 stat_update_max_atomic_write(inode); 1550 + out: 1549 1551 inode_unlock(inode); 1550 1552 mnt_drop_write_file(filp); 1551 1553 return ret; ··· 1600 1592 if (!inode_owner_or_capable(inode)) 1601 1593 return -EACCES; 1602 1594 1595 + if (!S_ISREG(inode->i_mode)) 1596 + return -EINVAL; 1597 + 1603 1598 ret = mnt_want_write_file(filp); 1604 1599 if (ret) 1605 1600 return ret; ··· 1615 1604 ret = f2fs_convert_inline_inode(inode); 1616 1605 if (ret) 1617 1606 goto out; 1607 + 1608 + stat_inc_volatile_write(inode); 1609 + stat_update_max_volatile_write(inode); 1618 1610 1619 1611 set_inode_flag(inode, FI_VOLATILE_FILE); 1620 1612 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); ··· 1674 1660 drop_inmem_pages(inode); 1675 1661 if (f2fs_is_volatile_file(inode)) { 1676 1662 clear_inode_flag(inode, FI_VOLATILE_FILE); 1663 + stat_dec_volatile_write(inode); 1677 1664 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); 1678 1665 } 1679 1666 ··· 1856 1841 mutex_lock(&sbi->gc_mutex); 1857 1842 } 1858 1843 1859 - ret = f2fs_gc(sbi, sync, true); 1844 + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); 1860 1845 out: 1861 1846 mnt_drop_write_file(filp); 1862 1847 return ret; ··· 1894 1879 pgoff_t pg_start, pg_end; 1895 1880 unsigned int blk_per_seg = sbi->blocks_per_seg; 1896 1881 unsigned int total = 0, sec_num; 1897 - unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; 1898 1882 block_t blk_end = 0; 1899 1883 bool fragmented = false; 1900 1884 int err; 1901 1885 1902 1886 /* if in-place-update policy is enabled, don't waste time here */ 1903 - if (need_inplace_update(inode)) 1887 + if (need_inplace_update_policy(inode, NULL)) 1904 1888 return -EINVAL; 1905 1889 1906 1890 pg_start = range->start >> PAGE_SHIFT; ··· 1957 1943 map.m_lblk = pg_start; 1958 1944 map.m_len = pg_end - pg_start; 1959 1945 1960 - sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; 1946 + sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); 1961 1947 1962 1948 /* 1963 1949 * make sure there are enough free section for LFS allocation, this can ··· 2034 2020 if (!capable(CAP_SYS_ADMIN)) 2035 2021 return -EPERM; 2036 2022 2037 - if (!S_ISREG(inode->i_mode)) 2023 + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) 2024 + return -EINVAL; 2025 + 2026 + if (f2fs_readonly(sbi->sb)) 2027 + return -EROFS; 2028 + 2029 + if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 2030 + sizeof(range))) 2031 + return -EFAULT; 2032 + 2033 + /* verify alignment of offset & size */ 2034 + if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) 2035 + return -EINVAL; 2036 + 2037 + if (unlikely((range.start + range.len) >> PAGE_SHIFT > 2038 + sbi->max_file_blocks)) 2038 2039 return -EINVAL; 2039 2040 2040 2041 err = mnt_want_write_file(filp); 2041 2042 if (err) 2042 2043 return err; 2043 2044 2044 - if (f2fs_readonly(sbi->sb)) { 2045 - err = -EROFS; 2046 - goto out; 2047 - } 2048 - 2049 - if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, 2050 - sizeof(range))) { 2051 - err = -EFAULT; 2052 - goto out; 2053 - } 2054 - 2055 - /* verify alignment of offset & size */ 2056 - if (range.start & (F2FS_BLKSIZE - 1) || 2057 - range.len & (F2FS_BLKSIZE - 1)) { 2058 - err = -EINVAL; 2059 - goto out; 2060 - } 2061 - 2062 2045 err = f2fs_defragment_range(sbi, filp, &range); 2046 + mnt_drop_write_file(filp); 2047 + 2063 2048 f2fs_update_time(sbi, REQ_TIME); 2064 2049 if (err < 0) 2065 - goto out; 2050 + return err; 2066 2051 2067 2052 if (copy_to_user((struct f2fs_defragment __user *)arg, &range, 2068 2053 sizeof(range))) 2069 - err = -EFAULT; 2070 - out: 2071 - mnt_drop_write_file(filp); 2072 - return err; 2054 + return -EFAULT; 2055 + 2056 + return 0; 2073 2057 } 2074 2058 2075 2059 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, ··· 2201 2189 range.pos_out, range.len); 2202 2190 2203 2191 mnt_drop_write_file(filp); 2192 + if (err) 2193 + goto err_out; 2204 2194 2205 2195 if (copy_to_user((struct f2fs_move_range __user *)arg, 2206 2196 &range, sizeof(range))) ··· 2211 2197 fdput(dst); 2212 2198 return err; 2213 2199 } 2200 + 2201 + static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) 2202 + { 2203 + struct inode *inode = file_inode(filp); 2204 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2205 + struct sit_info *sm = SIT_I(sbi); 2206 + unsigned int start_segno = 0, end_segno = 0; 2207 + unsigned int dev_start_segno = 0, dev_end_segno = 0; 2208 + struct f2fs_flush_device range; 2209 + int ret; 2210 + 2211 + if (!capable(CAP_SYS_ADMIN)) 2212 + return -EPERM; 2213 + 2214 + if (f2fs_readonly(sbi->sb)) 2215 + return -EROFS; 2216 + 2217 + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, 2218 + sizeof(range))) 2219 + return -EFAULT; 2220 + 2221 + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || 2222 + sbi->segs_per_sec != 1) { 2223 + f2fs_msg(sbi->sb, KERN_WARNING, 2224 + "Can't flush %u in %d for segs_per_sec %u != 1\n", 2225 + range.dev_num, sbi->s_ndevs, 2226 + sbi->segs_per_sec); 2227 + return -EINVAL; 2228 + } 2229 + 2230 + ret = mnt_want_write_file(filp); 2231 + if (ret) 2232 + return ret; 2233 + 2234 + if (range.dev_num != 0) 2235 + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); 2236 + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); 2237 + 2238 + start_segno = sm->last_victim[FLUSH_DEVICE]; 2239 + if (start_segno < dev_start_segno || start_segno >= dev_end_segno) 2240 + start_segno = dev_start_segno; 2241 + end_segno = min(start_segno + range.segments, dev_end_segno); 2242 + 2243 + while (start_segno < end_segno) { 2244 + if (!mutex_trylock(&sbi->gc_mutex)) { 2245 + ret = -EBUSY; 2246 + goto out; 2247 + } 2248 + sm->last_victim[GC_CB] = end_segno + 1; 2249 + sm->last_victim[GC_GREEDY] = end_segno + 1; 2250 + sm->last_victim[ALLOC_NEXT] = end_segno + 1; 2251 + ret = f2fs_gc(sbi, true, true, start_segno); 2252 + if (ret == -EAGAIN) 2253 + ret = 0; 2254 + else if (ret < 0) 2255 + break; 2256 + start_segno++; 2257 + } 2258 + out: 2259 + mnt_drop_write_file(filp); 2260 + return ret; 2261 + } 2262 + 2214 2263 2215 2264 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 2216 2265 { ··· 2312 2235 return f2fs_ioc_defragment(filp, arg); 2313 2236 case F2FS_IOC_MOVE_RANGE: 2314 2237 return f2fs_ioc_move_range(filp, arg); 2238 + case F2FS_IOC_FLUSH_DEVICE: 2239 + return f2fs_ioc_flush_device(filp, arg); 2315 2240 default: 2316 2241 return -ENOTTY; 2317 2242 } ··· 2381 2302 case F2FS_IOC_GARBAGE_COLLECT: 2382 2303 case F2FS_IOC_WRITE_CHECKPOINT: 2383 2304 case F2FS_IOC_DEFRAGMENT: 2384 - break; 2385 2305 case F2FS_IOC_MOVE_RANGE: 2306 + case F2FS_IOC_FLUSH_DEVICE: 2386 2307 break; 2387 2308 default: 2388 2309 return -ENOIOCTLCMD;
+61 -32
fs/f2fs/gc.c
··· 84 84 stat_inc_bggc_count(sbi); 85 85 86 86 /* if return value is not zero, no victim was selected */ 87 - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true)) 87 + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) 88 88 wait_ms = gc_th->no_gc_sleep_time; 89 89 90 90 trace_f2fs_background_gc(sbi->sb, wait_ms, ··· 172 172 if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) 173 173 p->max_search = sbi->max_victim_search; 174 174 175 - p->offset = sbi->last_victim[p->gc_mode]; 175 + /* let's select beginning hot/small space first */ 176 + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) 177 + p->offset = 0; 178 + else 179 + p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; 176 180 } 177 181 178 182 static unsigned int get_max_cost(struct f2fs_sb_info *sbi, ··· 186 182 if (p->alloc_mode == SSR) 187 183 return sbi->blocks_per_seg; 188 184 if (p->gc_mode == GC_GREEDY) 189 - return sbi->blocks_per_seg * p->ofs_unit; 185 + return 2 * sbi->blocks_per_seg * p->ofs_unit; 190 186 else if (p->gc_mode == GC_CB) 191 187 return UINT_MAX; 192 188 else /* No other gc_mode */ ··· 211 207 continue; 212 208 213 209 clear_bit(secno, dirty_i->victim_secmap); 214 - return secno * sbi->segs_per_sec; 210 + return GET_SEG_FROM_SEC(sbi, secno); 215 211 } 216 212 return NULL_SEGNO; 217 213 } ··· 219 215 static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) 220 216 { 221 217 struct sit_info *sit_i = SIT_I(sbi); 222 - unsigned int secno = GET_SECNO(sbi, segno); 223 - unsigned int start = secno * sbi->segs_per_sec; 218 + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 219 + unsigned int start = GET_SEG_FROM_SEC(sbi, secno); 224 220 unsigned long long mtime = 0; 225 221 unsigned int vblocks; 226 222 unsigned char age = 0; ··· 229 225 230 226 for (i = 0; i < sbi->segs_per_sec; i++) 231 227 mtime += get_seg_entry(sbi, start + i)->mtime; 232 - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 228 + vblocks = get_valid_blocks(sbi, segno, true); 233 229 234 230 mtime = div_u64(mtime, sbi->segs_per_sec); 235 231 vblocks = div_u64(vblocks, sbi->segs_per_sec); ··· 252 248 unsigned int segno) 253 249 { 254 250 unsigned int valid_blocks = 255 - get_valid_blocks(sbi, segno, sbi->segs_per_sec); 251 + get_valid_blocks(sbi, segno, true); 256 252 257 253 return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? 258 254 valid_blocks * 2 : valid_blocks; ··· 295 291 unsigned int *result, int gc_type, int type, char alloc_mode) 296 292 { 297 293 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 294 + struct sit_info *sm = SIT_I(sbi); 298 295 struct victim_sel_policy p; 299 296 unsigned int secno, last_victim; 300 297 unsigned int last_segment = MAIN_SEGS(sbi); ··· 309 304 p.min_segno = NULL_SEGNO; 310 305 p.min_cost = get_max_cost(sbi, &p); 311 306 307 + if (*result != NULL_SEGNO) { 308 + if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && 309 + get_valid_blocks(sbi, *result, false) && 310 + !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) 311 + p.min_segno = *result; 312 + goto out; 313 + } 314 + 312 315 if (p.max_search == 0) 313 316 goto out; 314 317 315 - last_victim = sbi->last_victim[p.gc_mode]; 318 + last_victim = sm->last_victim[p.gc_mode]; 316 319 if (p.alloc_mode == LFS && gc_type == FG_GC) { 317 320 p.min_segno = check_bg_victims(sbi); 318 321 if (p.min_segno != NULL_SEGNO) ··· 333 320 334 321 segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); 335 322 if (segno >= last_segment) { 336 - if (sbi->last_victim[p.gc_mode]) { 337 - last_segment = sbi->last_victim[p.gc_mode]; 338 - sbi->last_victim[p.gc_mode] = 0; 323 + if (sm->last_victim[p.gc_mode]) { 324 + last_segment = 325 + sm->last_victim[p.gc_mode]; 326 + sm->last_victim[p.gc_mode] = 0; 339 327 p.offset = 0; 340 328 continue; 341 329 } ··· 353 339 nsearched++; 354 340 } 355 341 356 - secno = GET_SECNO(sbi, segno); 342 + secno = GET_SEC_FROM_SEG(sbi, segno); 357 343 358 344 if (sec_usage_check(sbi, secno)) 359 345 goto next; ··· 371 357 } 372 358 next: 373 359 if (nsearched >= p.max_search) { 374 - if (!sbi->last_victim[p.gc_mode] && segno <= last_victim) 375 - sbi->last_victim[p.gc_mode] = last_victim + 1; 360 + if (!sm->last_victim[p.gc_mode] && segno <= last_victim) 361 + sm->last_victim[p.gc_mode] = last_victim + 1; 376 362 else 377 - sbi->last_victim[p.gc_mode] = segno + 1; 363 + sm->last_victim[p.gc_mode] = segno + 1; 364 + sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); 378 365 break; 379 366 } 380 367 } 381 368 if (p.min_segno != NULL_SEGNO) { 382 369 got_it: 383 370 if (p.alloc_mode == LFS) { 384 - secno = GET_SECNO(sbi, p.min_segno); 371 + secno = GET_SEC_FROM_SEG(sbi, p.min_segno); 385 372 if (gc_type == FG_GC) 386 373 sbi->cur_victim_sec = secno; 387 374 else ··· 565 550 get_node_info(sbi, nid, dni); 566 551 567 552 if (sum->version != dni->version) { 568 - f2fs_put_page(node_page, 1); 569 - return false; 553 + f2fs_msg(sbi->sb, KERN_WARNING, 554 + "%s: valid data with mismatched node version.", 555 + __func__); 556 + set_sbi_flag(sbi, SBI_NEED_FSCK); 570 557 } 571 558 572 559 *nofs = ofs_of_node(node_page); ··· 714 697 .type = DATA, 715 698 .op = REQ_OP_WRITE, 716 699 .op_flags = REQ_SYNC, 700 + .old_blkaddr = NULL_ADDR, 717 701 .page = page, 718 702 .encrypted_page = NULL, 703 + .need_lock = true, 719 704 }; 720 705 bool is_dirty = PageDirty(page); 721 706 int err; ··· 909 890 GET_SUM_BLOCK(sbi, segno)); 910 891 f2fs_put_page(sum_page, 0); 911 892 912 - if (get_valid_blocks(sbi, segno, 1) == 0 || 893 + if (get_valid_blocks(sbi, segno, false) == 0 || 913 894 !PageUptodate(sum_page) || 914 895 unlikely(f2fs_cp_error(sbi))) 915 896 goto next; ··· 924 905 * - mutex_lock(sentry_lock) - change_curseg() 925 906 * - lock_page(sum_page) 926 907 */ 927 - 928 908 if (type == SUM_TYPE_NODE) 929 909 gc_node_segment(sbi, sum->entries, segno, gc_type); 930 910 else ··· 942 924 blk_finish_plug(&plug); 943 925 944 926 if (gc_type == FG_GC && 945 - get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0) 927 + get_valid_blocks(sbi, start_segno, true) == 0) 946 928 sec_freed = 1; 947 929 948 930 stat_inc_call_count(sbi->stat_info); ··· 950 932 return sec_freed; 951 933 } 952 934 953 - int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) 935 + int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, 936 + bool background, unsigned int segno) 954 937 { 955 - unsigned int segno; 956 938 int gc_type = sync ? FG_GC : BG_GC; 957 939 int sec_freed = 0; 958 940 int ret = -EINVAL; 959 941 struct cp_control cpc; 942 + unsigned int init_segno = segno; 960 943 struct gc_inode_list gc_list = { 961 944 .ilist = LIST_HEAD_INIT(gc_list.ilist), 962 945 .iroot = RADIX_TREE_INIT(GFP_NOFS), ··· 978 959 * threshold, we can make them free by checkpoint. Then, we 979 960 * secure free segments which doesn't need fggc any more. 980 961 */ 981 - ret = write_checkpoint(sbi, &cpc); 982 - if (ret) 983 - goto stop; 962 + if (prefree_segments(sbi)) { 963 + ret = write_checkpoint(sbi, &cpc); 964 + if (ret) 965 + goto stop; 966 + } 984 967 if (has_not_enough_free_secs(sbi, 0, 0)) 985 968 gc_type = FG_GC; 986 969 } ··· 1002 981 sbi->cur_victim_sec = NULL_SEGNO; 1003 982 1004 983 if (!sync) { 1005 - if (has_not_enough_free_secs(sbi, sec_freed, 0)) 984 + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { 985 + segno = NULL_SEGNO; 1006 986 goto gc_more; 987 + } 1007 988 1008 989 if (gc_type == FG_GC) 1009 990 ret = write_checkpoint(sbi, &cpc); 1010 991 } 1011 992 stop: 993 + SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; 994 + SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; 1012 995 mutex_unlock(&sbi->gc_mutex); 1013 996 1014 997 put_gc_inode(&gc_list); ··· 1024 999 1025 1000 void build_gc_manager(struct f2fs_sb_info *sbi) 1026 1001 { 1027 - u64 main_count, resv_count, ovp_count, blocks_per_sec; 1002 + u64 main_count, resv_count, ovp_count; 1028 1003 1029 1004 DIRTY_I(sbi)->v_ops = &default_v_ops; 1030 1005 ··· 1032 1007 main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; 1033 1008 resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; 1034 1009 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; 1035 - blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; 1036 1010 1037 - sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec, 1038 - (main_count - resv_count)); 1011 + sbi->fggc_threshold = div64_u64((main_count - ovp_count) * 1012 + BLKS_PER_SEC(sbi), (main_count - resv_count)); 1013 + 1014 + /* give warm/cold data area from slower device */ 1015 + if (sbi->s_ndevs && sbi->segs_per_sec == 1) 1016 + SIT_I(sbi)->last_victim[ALLOC_NEXT] = 1017 + GET_SEGNO(sbi, FDEV(0).end_blk) + 1; 1039 1018 }
+17 -17
fs/f2fs/inline.c
··· 63 63 SetPageUptodate(page); 64 64 } 65 65 66 - bool truncate_inline_inode(struct page *ipage, u64 from) 66 + void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) 67 67 { 68 68 void *addr; 69 69 70 70 if (from >= MAX_INLINE_DATA) 71 - return false; 71 + return; 72 72 73 73 addr = inline_data_addr(ipage); 74 74 75 75 f2fs_wait_on_page_writeback(ipage, NODE, true); 76 76 memset(addr + from, 0, MAX_INLINE_DATA - from); 77 77 set_page_dirty(ipage); 78 - return true; 78 + 79 + if (from == 0) 80 + clear_inode_flag(inode, FI_DATA_EXIST); 79 81 } 80 82 81 83 int f2fs_read_inline_data(struct inode *inode, struct page *page) ··· 137 135 /* write data page to try to make data consistent */ 138 136 set_page_writeback(page); 139 137 fio.old_blkaddr = dn->data_blkaddr; 138 + set_inode_flag(dn->inode, FI_HOT_DATA); 140 139 write_data_page(dn, &fio); 141 140 f2fs_wait_on_page_writeback(page, DATA, true); 142 141 if (dirty) { ··· 149 146 set_inode_flag(dn->inode, FI_APPEND_WRITE); 150 147 151 148 /* clear inline data and flag after data writeback */ 152 - truncate_inline_inode(dn->inode_page, 0); 149 + truncate_inline_inode(dn->inode, dn->inode_page, 0); 153 150 clear_inline_node(dn->inode_page); 154 151 clear_out: 155 152 stat_dec_inline_inode(dn->inode); 156 - f2fs_clear_inline_inode(dn->inode); 153 + clear_inode_flag(dn->inode, FI_INLINE_DATA); 157 154 f2fs_put_dnode(dn); 158 155 return 0; 159 156 } ··· 270 267 if (f2fs_has_inline_data(inode)) { 271 268 ipage = get_node_page(sbi, inode->i_ino); 272 269 f2fs_bug_on(sbi, IS_ERR(ipage)); 273 - if (!truncate_inline_inode(ipage, 0)) 274 - return false; 275 - f2fs_clear_inline_inode(inode); 270 + truncate_inline_inode(inode, ipage, 0); 271 + clear_inode_flag(inode, FI_INLINE_DATA); 276 272 f2fs_put_page(ipage, 1); 277 273 } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { 278 274 if (truncate_blocks(inode, 0, false)) ··· 302 300 303 301 inline_dentry = inline_data_addr(ipage); 304 302 305 - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); 303 + make_dentry_ptr_inline(NULL, &d, inline_dentry); 306 304 de = find_target_dentry(fname, namehash, NULL, &d); 307 305 unlock_page(ipage); 308 306 if (de) ··· 321 319 322 320 dentry_blk = inline_data_addr(ipage); 323 321 324 - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); 322 + make_dentry_ptr_inline(NULL, &d, dentry_blk); 325 323 do_make_empty_dir(inode, parent, &d); 326 324 327 325 set_page_dirty(ipage); ··· 382 380 set_page_dirty(page); 383 381 384 382 /* clear inline dir and flag after data writeback */ 385 - truncate_inline_inode(ipage, 0); 383 + truncate_inline_inode(dir, ipage, 0); 386 384 387 385 stat_dec_inline_dir(dir); 388 386 clear_inode_flag(dir, FI_INLINE_DENTRY); ··· 402 400 unsigned long bit_pos = 0; 403 401 int err = 0; 404 402 405 - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); 403 + make_dentry_ptr_inline(NULL, &d, inline_dentry); 406 404 407 405 while (bit_pos < d.max) { 408 406 struct f2fs_dir_entry *de; ··· 457 455 } 458 456 459 457 memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); 460 - truncate_inline_inode(ipage, 0); 458 + truncate_inline_inode(dir, ipage, 0); 461 459 462 460 unlock_page(ipage); 463 461 ··· 529 527 err = PTR_ERR(page); 530 528 goto fail; 531 529 } 532 - if (f2fs_encrypted_inode(dir)) 533 - file_set_enc_name(inode); 534 530 } 535 531 536 532 f2fs_wait_on_page_writeback(ipage, NODE, true); 537 533 538 534 name_hash = f2fs_dentry_hash(new_name, NULL); 539 - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); 535 + make_dentry_ptr_inline(NULL, &d, dentry_blk); 540 536 f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); 541 537 542 538 set_page_dirty(ipage); ··· 623 623 624 624 inline_dentry = inline_data_addr(ipage); 625 625 626 - make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); 626 + make_dentry_ptr_inline(inode, &d, inline_dentry); 627 627 628 628 err = f2fs_fill_dentries(ctx, &d, 0, fstr); 629 629 if (!err)
+13 -10
fs/f2fs/inode.c
··· 316 316 } else if (err != -ENOENT) { 317 317 f2fs_stop_checkpoint(sbi, false); 318 318 } 319 - f2fs_inode_synced(inode); 320 319 return 0; 321 320 } 322 321 ret = update_inode(inode, node_page); ··· 338 339 * We need to balance fs here to prevent from producing dirty node pages 339 340 * during the urgent cleaning time when runing out of free sections. 340 341 */ 341 - if (update_inode_page(inode) && wbc && wbc->nr_to_write) 342 + update_inode_page(inode); 343 + if (wbc && wbc->nr_to_write) 342 344 f2fs_balance_fs(sbi, true); 343 345 return 0; 344 346 } ··· 372 372 if (inode->i_nlink || is_bad_inode(inode)) 373 373 goto no_delete; 374 374 375 - #ifdef CONFIG_F2FS_FAULT_INJECTION 376 - if (time_to_inject(sbi, FAULT_EVICT_INODE)) { 377 - f2fs_show_injection_info(FAULT_EVICT_INODE); 378 - goto no_delete; 379 - } 380 - #endif 381 - 382 375 remove_ino_entry(sbi, inode->i_ino, APPEND_INO); 383 376 remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); 384 377 ··· 382 389 if (F2FS_HAS_BLOCKS(inode)) 383 390 err = f2fs_truncate(inode); 384 391 392 + #ifdef CONFIG_F2FS_FAULT_INJECTION 393 + if (time_to_inject(sbi, FAULT_EVICT_INODE)) { 394 + f2fs_show_injection_info(FAULT_EVICT_INODE); 395 + err = -EIO; 396 + } 397 + #endif 385 398 if (!err) { 386 399 f2fs_lock_op(sbi); 387 400 err = remove_inode_page(inode); ··· 410 411 stat_dec_inline_dir(inode); 411 412 stat_dec_inline_inode(inode); 412 413 413 - invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 414 + /* ino == 0, if f2fs_new_inode() was failed t*/ 415 + if (inode->i_ino) 416 + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, 417 + inode->i_ino); 414 418 if (xnid) 415 419 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 416 420 if (inode->i_nlink) { ··· 450 448 * in a panic when flushing dirty inodes in gdirty_list. 451 449 */ 452 450 update_inode_page(inode); 451 + f2fs_inode_synced(inode); 453 452 454 453 /* don't make bad inode, since it becomes a regular file. */ 455 454 unlock_new_inode(inode);
+12 -41
fs/f2fs/namei.c
··· 148 148 inode->i_mapping->a_ops = &f2fs_dblock_aops; 149 149 ino = inode->i_ino; 150 150 151 - f2fs_balance_fs(sbi, true); 152 - 153 151 f2fs_lock_op(sbi); 154 152 err = f2fs_add_link(dentry, inode); 155 153 if (err) ··· 161 163 162 164 if (IS_DIRSYNC(dir)) 163 165 f2fs_sync_fs(sbi->sb, 1); 166 + 167 + f2fs_balance_fs(sbi, true); 164 168 return 0; 165 169 out: 166 170 handle_failed_inode(inode); ··· 424 424 inode_nohighmem(inode); 425 425 inode->i_mapping->a_ops = &f2fs_dblock_aops; 426 426 427 - f2fs_balance_fs(sbi, true); 428 - 429 427 f2fs_lock_op(sbi); 430 428 err = f2fs_add_link(dentry, inode); 431 429 if (err) ··· 486 488 } 487 489 488 490 kfree(sd); 491 + 492 + f2fs_balance_fs(sbi, true); 489 493 return err; 490 494 out: 491 495 handle_failed_inode(inode); ··· 509 509 inode->i_mapping->a_ops = &f2fs_dblock_aops; 510 510 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); 511 511 512 - f2fs_balance_fs(sbi, true); 513 - 514 512 set_inode_flag(inode, FI_INC_LINK); 515 513 f2fs_lock_op(sbi); 516 514 err = f2fs_add_link(dentry, inode); ··· 523 525 524 526 if (IS_DIRSYNC(dir)) 525 527 f2fs_sync_fs(sbi->sb, 1); 528 + 529 + f2fs_balance_fs(sbi, true); 526 530 return 0; 527 531 528 532 out_fail: ··· 555 555 init_special_inode(inode, inode->i_mode, rdev); 556 556 inode->i_op = &f2fs_special_inode_operations; 557 557 558 - f2fs_balance_fs(sbi, true); 559 - 560 558 f2fs_lock_op(sbi); 561 559 err = f2fs_add_link(dentry, inode); 562 560 if (err) ··· 568 570 569 571 if (IS_DIRSYNC(dir)) 570 572 f2fs_sync_fs(sbi->sb, 1); 573 + 574 + f2fs_balance_fs(sbi, true); 571 575 return 0; 572 576 out: 573 577 handle_failed_inode(inode); ··· 596 596 inode->i_mapping->a_ops = &f2fs_dblock_aops; 597 597 } 598 598 599 - f2fs_balance_fs(sbi, true); 600 - 601 599 f2fs_lock_op(sbi); 602 600 err = acquire_orphan_inode(sbi); 603 601 if (err) ··· 621 623 /* link_count was changed by d_tmpfile as well. */ 622 624 f2fs_unlock_op(sbi); 623 625 unlock_new_inode(inode); 626 + 627 + f2fs_balance_fs(sbi, true); 624 628 return 0; 625 629 626 630 release_out: ··· 721 721 if (err) 722 722 goto put_out_dir; 723 723 724 - err = update_dent_inode(old_inode, new_inode, 725 - &new_dentry->d_name); 726 - if (err) { 727 - release_orphan_inode(sbi); 728 - goto put_out_dir; 729 - } 730 - 731 724 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 732 725 733 726 new_inode->i_ctime = current_time(new_inode); ··· 773 780 774 781 down_write(&F2FS_I(old_inode)->i_sem); 775 782 file_lost_pino(old_inode); 776 - if (new_inode && file_enc_name(new_inode)) 777 - file_set_enc_name(old_inode); 778 783 up_write(&F2FS_I(old_inode)->i_sem); 779 784 780 785 old_inode->i_ctime = current_time(old_inode); ··· 900 909 old_nlink = old_dir_entry ? -1 : 1; 901 910 new_nlink = -old_nlink; 902 911 err = -EMLINK; 903 - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || 904 - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) 912 + if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) || 913 + (new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX)) 905 914 goto out_new_dir; 906 915 } 907 916 908 917 f2fs_balance_fs(sbi, true); 909 918 910 919 f2fs_lock_op(sbi); 911 - 912 - err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); 913 - if (err) 914 - goto out_unlock; 915 - if (file_enc_name(new_inode)) 916 - file_set_enc_name(old_inode); 917 - 918 - err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name); 919 - if (err) 920 - goto out_undo; 921 - if (file_enc_name(old_inode)) 922 - file_set_enc_name(new_inode); 923 920 924 921 /* update ".." directory entry info of old dentry */ 925 922 if (old_dir_entry) ··· 952 973 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 953 974 f2fs_sync_fs(sbi->sb, 1); 954 975 return 0; 955 - out_undo: 956 - /* 957 - * Still we may fail to recover name info of f2fs_inode here 958 - * Drop it, once its name is set as encrypted 959 - */ 960 - update_dent_inode(old_inode, old_inode, &old_dentry->d_name); 961 - out_unlock: 962 - f2fs_unlock_op(sbi); 963 976 out_new_dir: 964 977 if (new_dir_entry) { 965 978 f2fs_dentry_kunmap(new_inode, new_dir_page);
+86 -58
fs/f2fs/node.c
··· 22 22 #include "trace.h" 23 23 #include <trace/events/f2fs.h> 24 24 25 - #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) 25 + #define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) 26 26 27 27 static struct kmem_cache *nat_entry_slab; 28 28 static struct kmem_cache *free_nid_slab; ··· 63 63 int i; 64 64 65 65 for (i = 0; i <= UPDATE_INO; i++) 66 - mem_size += (sbi->im[i].ino_num * 67 - sizeof(struct ino_entry)) >> PAGE_SHIFT; 66 + mem_size += sbi->im[i].ino_num * 67 + sizeof(struct ino_entry); 68 + mem_size >>= PAGE_SHIFT; 68 69 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 69 70 } else if (type == EXTENT_CACHE) { 70 71 mem_size = (atomic_read(&sbi->total_ext_tree) * ··· 178 177 } 179 178 180 179 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, 181 - struct nat_entry *ne) 180 + struct nat_entry_set *set, struct nat_entry *ne) 182 181 { 183 - nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); 184 - struct nat_entry_set *head; 185 - 186 - head = radix_tree_lookup(&nm_i->nat_set_root, set); 187 - if (head) { 188 - list_move_tail(&ne->list, &nm_i->nat_entries); 189 - set_nat_flag(ne, IS_DIRTY, false); 190 - head->entry_cnt--; 191 - nm_i->dirty_nat_cnt--; 192 - } 182 + list_move_tail(&ne->list, &nm_i->nat_entries); 183 + set_nat_flag(ne, IS_DIRTY, false); 184 + set->entry_cnt--; 185 + nm_i->dirty_nat_cnt--; 193 186 } 194 187 195 188 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, ··· 376 381 struct page *page = NULL; 377 382 struct f2fs_nat_entry ne; 378 383 struct nat_entry *e; 384 + pgoff_t index; 379 385 int i; 380 386 381 387 ni->nid = nid; ··· 402 406 node_info_from_raw_nat(ni, &ne); 403 407 } 404 408 up_read(&curseg->journal_rwsem); 405 - if (i >= 0) 409 + if (i >= 0) { 410 + up_read(&nm_i->nat_tree_lock); 406 411 goto cache; 412 + } 407 413 408 414 /* Fill node_info from nat page */ 409 - page = get_current_nat_page(sbi, start_nid); 415 + index = current_nat_addr(sbi, nid); 416 + up_read(&nm_i->nat_tree_lock); 417 + 418 + page = get_meta_page(sbi, index); 410 419 nat_blk = (struct f2fs_nat_block *)page_address(page); 411 420 ne = nat_blk->entries[nid - start_nid]; 412 421 node_info_from_raw_nat(ni, &ne); 413 422 f2fs_put_page(page, 1); 414 423 cache: 415 - up_read(&nm_i->nat_tree_lock); 416 424 /* cache nat entry */ 417 425 down_write(&nm_i->nat_tree_lock); 418 426 cache_nat_entry(sbi, nid, &ne); ··· 1463 1463 f2fs_wait_on_page_writeback(page, NODE, true); 1464 1464 BUG_ON(PageWriteback(page)); 1465 1465 1466 + set_fsync_mark(page, 0); 1467 + set_dentry_mark(page, 0); 1468 + 1466 1469 if (!atomic || page == last_page) { 1467 1470 set_fsync_mark(page, 1); 1468 1471 if (IS_INODE(page)) { ··· 1769 1766 static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1770 1767 { 1771 1768 struct f2fs_nm_info *nm_i = NM_I(sbi); 1772 - struct free_nid *i; 1769 + struct free_nid *i, *e; 1773 1770 struct nat_entry *ne; 1774 - int err; 1771 + int err = -EINVAL; 1772 + bool ret = false; 1775 1773 1776 1774 /* 0 nid should not be used */ 1777 1775 if (unlikely(nid == 0)) 1778 1776 return false; 1779 1777 1780 - if (build) { 1781 - /* do not add allocated nids */ 1782 - ne = __lookup_nat_cache(nm_i, nid); 1783 - if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1784 - nat_get_blkaddr(ne) != NULL_ADDR)) 1785 - return false; 1786 - } 1787 - 1788 1778 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1789 1779 i->nid = nid; 1790 1780 i->state = NID_NEW; 1791 1781 1792 - if (radix_tree_preload(GFP_NOFS)) { 1793 - kmem_cache_free(free_nid_slab, i); 1794 - return true; 1795 - } 1782 + if (radix_tree_preload(GFP_NOFS)) 1783 + goto err; 1796 1784 1797 1785 spin_lock(&nm_i->nid_list_lock); 1786 + 1787 + if (build) { 1788 + /* 1789 + * Thread A Thread B 1790 + * - f2fs_create 1791 + * - f2fs_new_inode 1792 + * - alloc_nid 1793 + * - __insert_nid_to_list(ALLOC_NID_LIST) 1794 + * - f2fs_balance_fs_bg 1795 + * - build_free_nids 1796 + * - __build_free_nids 1797 + * - scan_nat_page 1798 + * - add_free_nid 1799 + * - __lookup_nat_cache 1800 + * - f2fs_add_link 1801 + * - init_inode_metadata 1802 + * - new_inode_page 1803 + * - new_node_page 1804 + * - set_node_addr 1805 + * - alloc_nid_done 1806 + * - __remove_nid_from_list(ALLOC_NID_LIST) 1807 + * - __insert_nid_to_list(FREE_NID_LIST) 1808 + */ 1809 + ne = __lookup_nat_cache(nm_i, nid); 1810 + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1811 + nat_get_blkaddr(ne) != NULL_ADDR)) 1812 + goto err_out; 1813 + 1814 + e = __lookup_free_nid_list(nm_i, nid); 1815 + if (e) { 1816 + if (e->state == NID_NEW) 1817 + ret = true; 1818 + goto err_out; 1819 + } 1820 + } 1821 + ret = true; 1798 1822 err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); 1823 + err_out: 1799 1824 spin_unlock(&nm_i->nid_list_lock); 1800 1825 radix_tree_preload_end(); 1801 - if (err) { 1826 + err: 1827 + if (err) 1802 1828 kmem_cache_free(free_nid_slab, i); 1803 - return true; 1804 - } 1805 - return true; 1829 + return ret; 1806 1830 } 1807 1831 1808 1832 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) ··· 1851 1821 } 1852 1822 1853 1823 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, 1854 - bool set, bool build, bool locked) 1824 + bool set, bool build) 1855 1825 { 1856 1826 struct f2fs_nm_info *nm_i = NM_I(sbi); 1857 1827 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); ··· 1865 1835 else 1866 1836 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1867 1837 1868 - if (!locked) 1869 - spin_lock(&nm_i->free_nid_lock); 1870 1838 if (set) 1871 1839 nm_i->free_nid_count[nat_ofs]++; 1872 1840 else if (!build) 1873 1841 nm_i->free_nid_count[nat_ofs]--; 1874 - if (!locked) 1875 - spin_unlock(&nm_i->free_nid_lock); 1876 1842 } 1877 1843 1878 1844 static void scan_nat_page(struct f2fs_sb_info *sbi, ··· 1897 1871 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1898 1872 if (blk_addr == NULL_ADDR) 1899 1873 freed = add_free_nid(sbi, start_nid, true); 1900 - update_free_nid_bitmap(sbi, start_nid, freed, true, false); 1874 + spin_lock(&NM_I(sbi)->nid_list_lock); 1875 + update_free_nid_bitmap(sbi, start_nid, freed, true); 1876 + spin_unlock(&NM_I(sbi)->nid_list_lock); 1901 1877 } 1902 1878 } 1903 1879 ··· 1954 1926 struct f2fs_journal *journal = curseg->journal; 1955 1927 int i = 0; 1956 1928 nid_t nid = nm_i->next_scan_nid; 1929 + 1930 + if (unlikely(nid >= nm_i->max_nid)) 1931 + nid = 0; 1957 1932 1958 1933 /* Enough entries */ 1959 1934 if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) ··· 2057 2026 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); 2058 2027 nm_i->available_nids--; 2059 2028 2060 - update_free_nid_bitmap(sbi, *nid, false, false, false); 2029 + update_free_nid_bitmap(sbi, *nid, false, false); 2061 2030 2062 2031 spin_unlock(&nm_i->nid_list_lock); 2063 2032 return true; ··· 2113 2082 2114 2083 nm_i->available_nids++; 2115 2084 2116 - update_free_nid_bitmap(sbi, nid, true, false, false); 2085 + update_free_nid_bitmap(sbi, nid, true, false); 2117 2086 2118 2087 spin_unlock(&nm_i->nid_list_lock); 2119 2088 ··· 2438 2407 } 2439 2408 raw_nat_from_node_info(raw_ne, &ne->ni); 2440 2409 nat_reset_flag(ne); 2441 - __clear_nat_cache_dirty(NM_I(sbi), ne); 2410 + __clear_nat_cache_dirty(NM_I(sbi), set, ne); 2442 2411 if (nat_get_blkaddr(ne) == NULL_ADDR) { 2443 2412 add_free_nid(sbi, nid, false); 2444 2413 spin_lock(&NM_I(sbi)->nid_list_lock); 2445 2414 NM_I(sbi)->available_nids++; 2446 - update_free_nid_bitmap(sbi, nid, true, false, false); 2415 + update_free_nid_bitmap(sbi, nid, true, false); 2447 2416 spin_unlock(&NM_I(sbi)->nid_list_lock); 2448 2417 } else { 2449 2418 spin_lock(&NM_I(sbi)->nid_list_lock); 2450 - update_free_nid_bitmap(sbi, nid, false, false, false); 2419 + update_free_nid_bitmap(sbi, nid, false, false); 2451 2420 spin_unlock(&NM_I(sbi)->nid_list_lock); 2452 2421 } 2453 2422 } ··· 2459 2428 f2fs_put_page(page, 1); 2460 2429 } 2461 2430 2462 - f2fs_bug_on(sbi, set->entry_cnt); 2463 - 2464 - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 2465 - kmem_cache_free(nat_entry_set_slab, set); 2431 + /* Allow dirty nats by node block allocation in write_begin */ 2432 + if (!set->entry_cnt) { 2433 + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 2434 + kmem_cache_free(nat_entry_set_slab, set); 2435 + } 2466 2436 } 2467 2437 2468 2438 /* ··· 2508 2476 __flush_nat_entry_set(sbi, set, cpc); 2509 2477 2510 2478 up_write(&nm_i->nat_tree_lock); 2511 - 2512 - f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2479 + /* Allow dirty nats by node block allocation in write_begin */ 2513 2480 } 2514 2481 2515 2482 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) ··· 2572 2541 nid = i * NAT_ENTRY_PER_BLOCK; 2573 2542 last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; 2574 2543 2575 - spin_lock(&nm_i->free_nid_lock); 2544 + spin_lock(&NM_I(sbi)->nid_list_lock); 2576 2545 for (; nid < last_nid; nid++) 2577 - update_free_nid_bitmap(sbi, nid, true, true, true); 2578 - spin_unlock(&nm_i->free_nid_lock); 2546 + update_free_nid_bitmap(sbi, nid, true, true); 2547 + spin_unlock(&NM_I(sbi)->nid_list_lock); 2579 2548 } 2580 2549 2581 2550 for (i = 0; i < nm_i->nat_blocks; i++) { ··· 2666 2635 sizeof(unsigned short), GFP_KERNEL); 2667 2636 if (!nm_i->free_nid_count) 2668 2637 return -ENOMEM; 2669 - 2670 - spin_lock_init(&nm_i->free_nid_lock); 2671 - 2672 2638 return 0; 2673 2639 } 2674 2640
+17 -14
fs/f2fs/node.h
··· 9 9 * published by the Free Software Foundation. 10 10 */ 11 11 /* start node id of a node block dedicated to the given node id */ 12 - #define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) 12 + #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) 13 13 14 14 /* node block offset on the NAT area dedicated to the given start node id */ 15 - #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) 15 + #define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) 16 16 17 17 /* # of pages to perform synchronous readahead before building free nids */ 18 18 #define FREE_NID_PAGES 8 ··· 62 62 struct node_info ni; /* in-memory node information */ 63 63 }; 64 64 65 - #define nat_get_nid(nat) (nat->ni.nid) 66 - #define nat_set_nid(nat, n) (nat->ni.nid = n) 67 - #define nat_get_blkaddr(nat) (nat->ni.blk_addr) 68 - #define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) 69 - #define nat_get_ino(nat) (nat->ni.ino) 70 - #define nat_set_ino(nat, i) (nat->ni.ino = i) 71 - #define nat_get_version(nat) (nat->ni.version) 72 - #define nat_set_version(nat, v) (nat->ni.version = v) 65 + #define nat_get_nid(nat) ((nat)->ni.nid) 66 + #define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) 67 + #define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) 68 + #define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) 69 + #define nat_get_ino(nat) ((nat)->ni.ino) 70 + #define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) 71 + #define nat_get_version(nat) ((nat)->ni.version) 72 + #define nat_set_version(nat, v) ((nat)->ni.version = (v)) 73 73 74 - #define inc_node_version(version) (++version) 74 + #define inc_node_version(version) (++(version)) 75 75 76 76 static inline void copy_node_info(struct node_info *dst, 77 77 struct node_info *src) ··· 200 200 struct f2fs_nm_info *nm_i = NM_I(sbi); 201 201 pgoff_t block_off; 202 202 pgoff_t block_addr; 203 - int seg_off; 204 203 204 + /* 205 + * block_off = segment_off * 512 + off_in_segment 206 + * OLD = (segment_off * 512) * 2 + off_in_segment 207 + * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment 208 + */ 205 209 block_off = NAT_BLOCK_OFFSET(start); 206 - seg_off = block_off >> sbi->log_blocks_per_seg; 207 210 208 211 block_addr = (pgoff_t)(nm_i->nat_blkaddr + 209 - (seg_off << sbi->log_blocks_per_seg << 1) + 212 + (block_off << 1) - 210 213 (block_off & (sbi->blocks_per_seg - 1))); 211 214 212 215 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
+5 -3
fs/f2fs/recovery.c
··· 198 198 ino_of_node(page), name); 199 199 } 200 200 201 - static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 201 + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, 202 + bool check_only) 202 203 { 203 204 struct curseg_info *curseg; 204 205 struct page *page = NULL; ··· 226 225 227 226 entry = get_fsync_inode(head, ino_of_node(page)); 228 227 if (!entry) { 229 - if (IS_INODE(page) && is_dent_dnode(page)) { 228 + if (!check_only && 229 + IS_INODE(page) && is_dent_dnode(page)) { 230 230 err = recover_inode_page(sbi, page); 231 231 if (err) 232 232 break; ··· 571 569 mutex_lock(&sbi->cp_mutex); 572 570 573 571 /* step #1: find fsynced inode numbers */ 574 - err = find_fsync_dnodes(sbi, &inode_list); 572 + err = find_fsync_dnodes(sbi, &inode_list, check_only); 575 573 if (err || list_empty(&inode_list)) 576 574 goto out; 577 575
+569 -223
fs/f2fs/segment.c
··· 250 250 stat_dec_atomic_write(inode); 251 251 } 252 252 253 + void drop_inmem_page(struct inode *inode, struct page *page) 254 + { 255 + struct f2fs_inode_info *fi = F2FS_I(inode); 256 + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 257 + struct list_head *head = &fi->inmem_pages; 258 + struct inmem_pages *cur = NULL; 259 + 260 + f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); 261 + 262 + mutex_lock(&fi->inmem_lock); 263 + list_for_each_entry(cur, head, list) { 264 + if (cur->page == page) 265 + break; 266 + } 267 + 268 + f2fs_bug_on(sbi, !cur || cur->page != page); 269 + list_del(&cur->list); 270 + mutex_unlock(&fi->inmem_lock); 271 + 272 + dec_page_count(sbi, F2FS_INMEM_PAGES); 273 + kmem_cache_free(inmem_entry_slab, cur); 274 + 275 + ClearPageUptodate(page); 276 + set_page_private(page, 0); 277 + ClearPagePrivate(page); 278 + f2fs_put_page(page, 0); 279 + 280 + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); 281 + } 282 + 253 283 static int __commit_inmem_pages(struct inode *inode, 254 284 struct list_head *revoke_list) 255 285 { ··· 291 261 .type = DATA, 292 262 .op = REQ_OP_WRITE, 293 263 .op_flags = REQ_SYNC | REQ_PRIO, 294 - .encrypted_page = NULL, 295 264 }; 296 265 pgoff_t last_idx = ULONG_MAX; 297 266 int err = 0; ··· 310 281 } 311 282 312 283 fio.page = page; 284 + fio.old_blkaddr = NULL_ADDR; 285 + fio.encrypted_page = NULL; 286 + fio.need_lock = false, 313 287 err = do_write_data_page(&fio); 314 288 if (err) { 315 289 unlock_page(page); ··· 390 358 } 391 359 #endif 392 360 393 - if (!need) 394 - return; 395 - 396 361 /* balance_fs_bg is able to be pending */ 397 - if (excess_cached_nats(sbi)) 362 + if (need && excess_cached_nats(sbi)) 398 363 f2fs_balance_fs_bg(sbi); 399 364 400 365 /* ··· 400 371 */ 401 372 if (has_not_enough_free_secs(sbi, 0, 0)) { 402 373 mutex_lock(&sbi->gc_mutex); 403 - f2fs_gc(sbi, false, false); 374 + f2fs_gc(sbi, false, false, NULL_SEGNO); 404 375 } 405 376 } 406 377 ··· 419 390 else 420 391 build_free_nids(sbi, false, false); 421 392 422 - if (!is_idle(sbi)) 393 + if (!is_idle(sbi) && !excess_dirty_nats(sbi)) 423 394 return; 424 395 425 396 /* checkpoint is the only way to shrink partial cached entries */ ··· 440 411 } 441 412 } 442 413 443 - static int __submit_flush_wait(struct block_device *bdev) 414 + static int __submit_flush_wait(struct f2fs_sb_info *sbi, 415 + struct block_device *bdev) 444 416 { 445 417 struct bio *bio = f2fs_bio_alloc(0); 446 418 int ret; 447 419 448 - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 420 + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; 449 421 bio->bi_bdev = bdev; 450 422 ret = submit_bio_wait(bio); 451 423 bio_put(bio); 424 + 425 + trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), 426 + test_opt(sbi, FLUSH_MERGE), ret); 452 427 return ret; 453 428 } 454 429 455 430 static int submit_flush_wait(struct f2fs_sb_info *sbi) 456 431 { 457 - int ret = __submit_flush_wait(sbi->sb->s_bdev); 432 + int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); 458 433 int i; 459 434 460 - if (sbi->s_ndevs && !ret) { 461 - for (i = 1; i < sbi->s_ndevs; i++) { 462 - trace_f2fs_issue_flush(FDEV(i).bdev, 463 - test_opt(sbi, NOBARRIER), 464 - test_opt(sbi, FLUSH_MERGE)); 465 - ret = __submit_flush_wait(FDEV(i).bdev); 466 - if (ret) 467 - break; 468 - } 435 + if (!sbi->s_ndevs || ret) 436 + return ret; 437 + 438 + for (i = 1; i < sbi->s_ndevs; i++) { 439 + ret = __submit_flush_wait(sbi, FDEV(i).bdev); 440 + if (ret) 441 + break; 469 442 } 470 443 return ret; 471 444 } ··· 489 458 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 490 459 491 460 ret = submit_flush_wait(sbi); 461 + atomic_inc(&fcc->issued_flush); 462 + 492 463 llist_for_each_entry_safe(cmd, next, 493 464 fcc->dispatch_list, llnode) { 494 465 cmd->ret = ret; ··· 508 475 { 509 476 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 510 477 struct flush_cmd cmd; 478 + int ret; 511 479 512 480 if (test_opt(sbi, NOBARRIER)) 513 481 return 0; 514 482 515 - if (!test_opt(sbi, FLUSH_MERGE)) 516 - return submit_flush_wait(sbi); 517 - 518 - if (!atomic_read(&fcc->submit_flush)) { 519 - int ret; 520 - 521 - atomic_inc(&fcc->submit_flush); 483 + if (!test_opt(sbi, FLUSH_MERGE)) { 522 484 ret = submit_flush_wait(sbi); 523 - atomic_dec(&fcc->submit_flush); 485 + atomic_inc(&fcc->issued_flush); 486 + return ret; 487 + } 488 + 489 + if (!atomic_read(&fcc->issing_flush)) { 490 + atomic_inc(&fcc->issing_flush); 491 + ret = submit_flush_wait(sbi); 492 + atomic_dec(&fcc->issing_flush); 493 + 494 + atomic_inc(&fcc->issued_flush); 524 495 return ret; 525 496 } 526 497 527 498 init_completion(&cmd.wait); 528 499 529 - atomic_inc(&fcc->submit_flush); 500 + atomic_inc(&fcc->issing_flush); 530 501 llist_add(&cmd.llnode, &fcc->issue_list); 531 502 532 503 if (!fcc->dispatch_list) ··· 538 501 539 502 if (fcc->f2fs_issue_flush) { 540 503 wait_for_completion(&cmd.wait); 541 - atomic_dec(&fcc->submit_flush); 504 + atomic_dec(&fcc->issing_flush); 542 505 } else { 543 506 llist_del_all(&fcc->issue_list); 544 - atomic_set(&fcc->submit_flush, 0); 507 + atomic_set(&fcc->issing_flush, 0); 545 508 } 546 509 547 510 return cmd.ret; ··· 561 524 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 562 525 if (!fcc) 563 526 return -ENOMEM; 564 - atomic_set(&fcc->submit_flush, 0); 527 + atomic_set(&fcc->issued_flush, 0); 528 + atomic_set(&fcc->issing_flush, 0); 565 529 init_waitqueue_head(&fcc->flush_wait_queue); 566 530 init_llist_head(&fcc->issue_list); 567 531 SM_I(sbi)->fcc_info = fcc; ··· 635 597 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) 636 598 dirty_i->nr_dirty[t]--; 637 599 638 - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) 639 - clear_bit(GET_SECNO(sbi, segno), 600 + if (get_valid_blocks(sbi, segno, true) == 0) 601 + clear_bit(GET_SEC_FROM_SEG(sbi, segno), 640 602 dirty_i->victim_secmap); 641 603 } 642 604 } ··· 656 618 657 619 mutex_lock(&dirty_i->seglist_lock); 658 620 659 - valid_blocks = get_valid_blocks(sbi, segno, 0); 621 + valid_blocks = get_valid_blocks(sbi, segno, false); 660 622 661 623 if (valid_blocks == 0) { 662 624 __locate_dirty_segment(sbi, segno, PRE); ··· 671 633 mutex_unlock(&dirty_i->seglist_lock); 672 634 } 673 635 674 - static void __add_discard_cmd(struct f2fs_sb_info *sbi, 675 - struct bio *bio, block_t lstart, block_t len) 636 + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, 637 + struct block_device *bdev, block_t lstart, 638 + block_t start, block_t len) 676 639 { 677 640 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 678 - struct list_head *cmd_list = &(dcc->discard_cmd_list); 641 + struct list_head *pend_list; 679 642 struct discard_cmd *dc; 643 + 644 + f2fs_bug_on(sbi, !len); 645 + 646 + pend_list = &dcc->pend_list[plist_idx(len)]; 680 647 681 648 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); 682 649 INIT_LIST_HEAD(&dc->list); 683 - dc->bio = bio; 684 - bio->bi_private = dc; 650 + dc->bdev = bdev; 685 651 dc->lstart = lstart; 652 + dc->start = start; 686 653 dc->len = len; 654 + dc->ref = 0; 687 655 dc->state = D_PREP; 656 + dc->error = 0; 688 657 init_completion(&dc->wait); 658 + list_add_tail(&dc->list, pend_list); 659 + atomic_inc(&dcc->discard_cmd_cnt); 660 + dcc->undiscard_blks += len; 661 + 662 + return dc; 663 + } 664 + 665 + static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, 666 + struct block_device *bdev, block_t lstart, 667 + block_t start, block_t len, 668 + struct rb_node *parent, struct rb_node **p) 669 + { 670 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 671 + struct discard_cmd *dc; 672 + 673 + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); 674 + 675 + rb_link_node(&dc->rb_node, parent, p); 676 + rb_insert_color(&dc->rb_node, &dcc->root); 677 + 678 + return dc; 679 + } 680 + 681 + static void __detach_discard_cmd(struct discard_cmd_control *dcc, 682 + struct discard_cmd *dc) 683 + { 684 + if (dc->state == D_DONE) 685 + atomic_dec(&dcc->issing_discard); 686 + 687 + list_del(&dc->list); 688 + rb_erase(&dc->rb_node, &dcc->root); 689 + dcc->undiscard_blks -= dc->len; 690 + 691 + kmem_cache_free(discard_cmd_slab, dc); 692 + 693 + atomic_dec(&dcc->discard_cmd_cnt); 694 + } 695 + 696 + static void __remove_discard_cmd(struct f2fs_sb_info *sbi, 697 + struct discard_cmd *dc) 698 + { 699 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 700 + 701 + if (dc->error == -EOPNOTSUPP) 702 + dc->error = 0; 703 + 704 + if (dc->error) 705 + f2fs_msg(sbi->sb, KERN_INFO, 706 + "Issue discard failed, ret: %d", dc->error); 707 + __detach_discard_cmd(dcc, dc); 708 + } 709 + 710 + static void f2fs_submit_discard_endio(struct bio *bio) 711 + { 712 + struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 713 + 714 + dc->error = bio->bi_error; 715 + dc->state = D_DONE; 716 + complete(&dc->wait); 717 + bio_put(bio); 718 + } 719 + 720 + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 721 + static void __submit_discard_cmd(struct f2fs_sb_info *sbi, 722 + struct discard_cmd *dc) 723 + { 724 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 725 + struct bio *bio = NULL; 726 + 727 + if (dc->state != D_PREP) 728 + return; 729 + 730 + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); 731 + 732 + dc->error = __blkdev_issue_discard(dc->bdev, 733 + SECTOR_FROM_BLOCK(dc->start), 734 + SECTOR_FROM_BLOCK(dc->len), 735 + GFP_NOFS, 0, &bio); 736 + if (!dc->error) { 737 + /* should keep before submission to avoid D_DONE right away */ 738 + dc->state = D_SUBMIT; 739 + atomic_inc(&dcc->issued_discard); 740 + atomic_inc(&dcc->issing_discard); 741 + if (bio) { 742 + bio->bi_private = dc; 743 + bio->bi_end_io = f2fs_submit_discard_endio; 744 + bio->bi_opf |= REQ_SYNC; 745 + submit_bio(bio); 746 + list_move_tail(&dc->list, &dcc->wait_list); 747 + } 748 + } else { 749 + __remove_discard_cmd(sbi, dc); 750 + } 751 + } 752 + 753 + static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, 754 + struct block_device *bdev, block_t lstart, 755 + block_t start, block_t len, 756 + struct rb_node **insert_p, 757 + struct rb_node *insert_parent) 758 + { 759 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 760 + struct rb_node **p = &dcc->root.rb_node; 761 + struct rb_node *parent = NULL; 762 + struct discard_cmd *dc = NULL; 763 + 764 + if (insert_p && insert_parent) { 765 + parent = insert_parent; 766 + p = insert_p; 767 + goto do_insert; 768 + } 769 + 770 + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); 771 + do_insert: 772 + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); 773 + if (!dc) 774 + return NULL; 775 + 776 + return dc; 777 + } 778 + 779 + static void __relocate_discard_cmd(struct discard_cmd_control *dcc, 780 + struct discard_cmd *dc) 781 + { 782 + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); 783 + } 784 + 785 + static void __punch_discard_cmd(struct f2fs_sb_info *sbi, 786 + struct discard_cmd *dc, block_t blkaddr) 787 + { 788 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 789 + struct discard_info di = dc->di; 790 + bool modified = false; 791 + 792 + if (dc->state == D_DONE || dc->len == 1) { 793 + __remove_discard_cmd(sbi, dc); 794 + return; 795 + } 796 + 797 + dcc->undiscard_blks -= di.len; 798 + 799 + if (blkaddr > di.lstart) { 800 + dc->len = blkaddr - dc->lstart; 801 + dcc->undiscard_blks += dc->len; 802 + __relocate_discard_cmd(dcc, dc); 803 + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); 804 + modified = true; 805 + } 806 + 807 + if (blkaddr < di.lstart + di.len - 1) { 808 + if (modified) { 809 + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, 810 + di.start + blkaddr + 1 - di.lstart, 811 + di.lstart + di.len - 1 - blkaddr, 812 + NULL, NULL); 813 + f2fs_bug_on(sbi, 814 + !__check_rb_tree_consistence(sbi, &dcc->root)); 815 + } else { 816 + dc->lstart++; 817 + dc->len--; 818 + dc->start++; 819 + dcc->undiscard_blks += dc->len; 820 + __relocate_discard_cmd(dcc, dc); 821 + f2fs_bug_on(sbi, 822 + !__check_rb_tree_consistence(sbi, &dcc->root)); 823 + } 824 + } 825 + } 826 + 827 + static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 828 + struct block_device *bdev, block_t lstart, 829 + block_t start, block_t len) 830 + { 831 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 832 + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; 833 + struct discard_cmd *dc; 834 + struct discard_info di = {0}; 835 + struct rb_node **insert_p = NULL, *insert_parent = NULL; 836 + block_t end = lstart + len; 689 837 690 838 mutex_lock(&dcc->cmd_lock); 691 - list_add_tail(&dc->list, cmd_list); 839 + 840 + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, 841 + NULL, lstart, 842 + (struct rb_entry **)&prev_dc, 843 + (struct rb_entry **)&next_dc, 844 + &insert_p, &insert_parent, true); 845 + if (dc) 846 + prev_dc = dc; 847 + 848 + if (!prev_dc) { 849 + di.lstart = lstart; 850 + di.len = next_dc ? next_dc->lstart - lstart : len; 851 + di.len = min(di.len, len); 852 + di.start = start; 853 + } 854 + 855 + while (1) { 856 + struct rb_node *node; 857 + bool merged = false; 858 + struct discard_cmd *tdc = NULL; 859 + 860 + if (prev_dc) { 861 + di.lstart = prev_dc->lstart + prev_dc->len; 862 + if (di.lstart < lstart) 863 + di.lstart = lstart; 864 + if (di.lstart >= end) 865 + break; 866 + 867 + if (!next_dc || next_dc->lstart > end) 868 + di.len = end - di.lstart; 869 + else 870 + di.len = next_dc->lstart - di.lstart; 871 + di.start = start + di.lstart - lstart; 872 + } 873 + 874 + if (!di.len) 875 + goto next; 876 + 877 + if (prev_dc && prev_dc->state == D_PREP && 878 + prev_dc->bdev == bdev && 879 + __is_discard_back_mergeable(&di, &prev_dc->di)) { 880 + prev_dc->di.len += di.len; 881 + dcc->undiscard_blks += di.len; 882 + __relocate_discard_cmd(dcc, prev_dc); 883 + f2fs_bug_on(sbi, 884 + !__check_rb_tree_consistence(sbi, &dcc->root)); 885 + di = prev_dc->di; 886 + tdc = prev_dc; 887 + merged = true; 888 + } 889 + 890 + if (next_dc && next_dc->state == D_PREP && 891 + next_dc->bdev == bdev && 892 + __is_discard_front_mergeable(&di, &next_dc->di)) { 893 + next_dc->di.lstart = di.lstart; 894 + next_dc->di.len += di.len; 895 + next_dc->di.start = di.start; 896 + dcc->undiscard_blks += di.len; 897 + __relocate_discard_cmd(dcc, next_dc); 898 + if (tdc) 899 + __remove_discard_cmd(sbi, tdc); 900 + f2fs_bug_on(sbi, 901 + !__check_rb_tree_consistence(sbi, &dcc->root)); 902 + merged = true; 903 + } 904 + 905 + if (!merged) { 906 + __insert_discard_tree(sbi, bdev, di.lstart, di.start, 907 + di.len, NULL, NULL); 908 + f2fs_bug_on(sbi, 909 + !__check_rb_tree_consistence(sbi, &dcc->root)); 910 + } 911 + next: 912 + prev_dc = next_dc; 913 + if (!prev_dc) 914 + break; 915 + 916 + node = rb_next(&prev_dc->rb_node); 917 + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 918 + } 919 + 692 920 mutex_unlock(&dcc->cmd_lock); 693 921 } 694 922 695 - static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) 923 + static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 924 + struct block_device *bdev, block_t blkstart, block_t blklen) 696 925 { 697 - int err = dc->bio->bi_error; 926 + block_t lblkstart = blkstart; 698 927 699 - if (dc->state == D_DONE) 700 - atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); 928 + trace_f2fs_queue_discard(bdev, blkstart, blklen); 701 929 702 - if (err == -EOPNOTSUPP) 703 - err = 0; 930 + if (sbi->s_ndevs) { 931 + int devi = f2fs_target_device_index(sbi, blkstart); 704 932 705 - if (err) 706 - f2fs_msg(sbi->sb, KERN_INFO, 707 - "Issue discard failed, ret: %d", err); 708 - bio_put(dc->bio); 709 - list_del(&dc->list); 710 - kmem_cache_free(discard_cmd_slab, dc); 933 + blkstart -= FDEV(devi).start_blk; 934 + } 935 + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 936 + return 0; 711 937 } 712 938 713 - /* This should be covered by global mutex, &sit_i->sentry_lock */ 714 - void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 939 + static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) 715 940 { 716 941 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 717 - struct list_head *wait_list = &(dcc->discard_cmd_list); 942 + struct list_head *pend_list; 718 943 struct discard_cmd *dc, *tmp; 719 944 struct blk_plug plug; 945 + int i, iter = 0; 720 946 721 947 mutex_lock(&dcc->cmd_lock); 722 - 723 948 blk_start_plug(&plug); 949 + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 950 + pend_list = &dcc->pend_list[i]; 951 + list_for_each_entry_safe(dc, tmp, pend_list, list) { 952 + f2fs_bug_on(sbi, dc->state != D_PREP); 724 953 725 - list_for_each_entry_safe(dc, tmp, wait_list, list) { 726 - 727 - if (blkaddr == NULL_ADDR) { 728 - if (dc->state == D_PREP) { 729 - dc->state = D_SUBMIT; 730 - submit_bio(dc->bio); 731 - atomic_inc(&dcc->submit_discard); 732 - } 733 - continue; 734 - } 735 - 736 - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { 737 - if (dc->state == D_SUBMIT) 738 - wait_for_completion_io(&dc->wait); 739 - else 740 - __remove_discard_cmd(sbi, dc); 954 + if (!issue_cond || is_idle(sbi)) 955 + __submit_discard_cmd(sbi, dc); 956 + if (issue_cond && iter++ > DISCARD_ISSUE_RATE) 957 + goto out; 741 958 } 742 959 } 960 + out: 743 961 blk_finish_plug(&plug); 962 + mutex_unlock(&dcc->cmd_lock); 963 + } 744 964 745 - /* this comes from f2fs_put_super */ 746 - if (blkaddr == NULL_ADDR) { 747 - list_for_each_entry_safe(dc, tmp, wait_list, list) { 965 + static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) 966 + { 967 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 968 + struct list_head *wait_list = &(dcc->wait_list); 969 + struct discard_cmd *dc, *tmp; 970 + 971 + mutex_lock(&dcc->cmd_lock); 972 + list_for_each_entry_safe(dc, tmp, wait_list, list) { 973 + if (!wait_cond || dc->state == D_DONE) { 974 + if (dc->ref) 975 + continue; 748 976 wait_for_completion_io(&dc->wait); 749 977 __remove_discard_cmd(sbi, dc); 750 978 } ··· 1018 714 mutex_unlock(&dcc->cmd_lock); 1019 715 } 1020 716 1021 - static void f2fs_submit_discard_endio(struct bio *bio) 717 + /* This should be covered by global mutex, &sit_i->sentry_lock */ 718 + void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) 1022 719 { 1023 - struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 720 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 721 + struct discard_cmd *dc; 722 + bool need_wait = false; 1024 723 1025 - complete(&dc->wait); 1026 - dc->state = D_DONE; 724 + mutex_lock(&dcc->cmd_lock); 725 + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); 726 + if (dc) { 727 + if (dc->state == D_PREP) { 728 + __punch_discard_cmd(sbi, dc, blkaddr); 729 + } else { 730 + dc->ref++; 731 + need_wait = true; 732 + } 733 + } 734 + mutex_unlock(&dcc->cmd_lock); 735 + 736 + if (need_wait) { 737 + wait_for_completion_io(&dc->wait); 738 + mutex_lock(&dcc->cmd_lock); 739 + f2fs_bug_on(sbi, dc->state != D_DONE); 740 + dc->ref--; 741 + if (!dc->ref) 742 + __remove_discard_cmd(sbi, dc); 743 + mutex_unlock(&dcc->cmd_lock); 744 + } 745 + } 746 + 747 + /* This comes from f2fs_put_super */ 748 + void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) 749 + { 750 + __issue_discard_cmd(sbi, false); 751 + __wait_discard_cmd(sbi, false); 1027 752 } 1028 753 1029 754 static int issue_discard_thread(void *data) ··· 1060 727 struct f2fs_sb_info *sbi = data; 1061 728 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1062 729 wait_queue_head_t *q = &dcc->discard_wait_queue; 1063 - struct list_head *cmd_list = &dcc->discard_cmd_list; 1064 - struct discard_cmd *dc, *tmp; 1065 - struct blk_plug plug; 1066 - int iter = 0; 1067 730 repeat: 1068 731 if (kthread_should_stop()) 1069 732 return 0; 1070 733 1071 - blk_start_plug(&plug); 734 + __issue_discard_cmd(sbi, true); 735 + __wait_discard_cmd(sbi, true); 1072 736 1073 - mutex_lock(&dcc->cmd_lock); 1074 - list_for_each_entry_safe(dc, tmp, cmd_list, list) { 1075 - if (dc->state == D_PREP) { 1076 - dc->state = D_SUBMIT; 1077 - submit_bio(dc->bio); 1078 - atomic_inc(&dcc->submit_discard); 1079 - if (iter++ > DISCARD_ISSUE_RATE) 1080 - break; 1081 - } else if (dc->state == D_DONE) { 1082 - __remove_discard_cmd(sbi, dc); 1083 - } 1084 - } 1085 - mutex_unlock(&dcc->cmd_lock); 1086 - 1087 - blk_finish_plug(&plug); 1088 - 1089 - iter = 0; 1090 737 congestion_wait(BLK_RW_SYNC, HZ/50); 1091 738 1092 - wait_event_interruptible(*q, 1093 - kthread_should_stop() || !list_empty(&dcc->discard_cmd_list)); 739 + wait_event_interruptible(*q, kthread_should_stop() || 740 + atomic_read(&dcc->discard_cmd_cnt)); 1094 741 goto repeat; 1095 - } 1096 - 1097 - 1098 - /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 1099 - static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, 1100 - struct block_device *bdev, block_t blkstart, block_t blklen) 1101 - { 1102 - struct bio *bio = NULL; 1103 - block_t lblkstart = blkstart; 1104 - int err; 1105 - 1106 - trace_f2fs_issue_discard(bdev, blkstart, blklen); 1107 - 1108 - if (sbi->s_ndevs) { 1109 - int devi = f2fs_target_device_index(sbi, blkstart); 1110 - 1111 - blkstart -= FDEV(devi).start_blk; 1112 - } 1113 - err = __blkdev_issue_discard(bdev, 1114 - SECTOR_FROM_BLOCK(blkstart), 1115 - SECTOR_FROM_BLOCK(blklen), 1116 - GFP_NOFS, 0, &bio); 1117 - if (!err && bio) { 1118 - bio->bi_end_io = f2fs_submit_discard_endio; 1119 - bio->bi_opf |= REQ_SYNC; 1120 - 1121 - __add_discard_cmd(sbi, bio, lblkstart, blklen); 1122 - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); 1123 - } 1124 - return err; 1125 742 } 1126 743 1127 744 #ifdef CONFIG_BLK_DEV_ZONED ··· 1079 796 struct block_device *bdev, block_t blkstart, block_t blklen) 1080 797 { 1081 798 sector_t sector, nr_sects; 799 + block_t lblkstart = blkstart; 1082 800 int devi = 0; 1083 801 1084 802 if (sbi->s_ndevs) { ··· 1097 813 case BLK_ZONE_TYPE_CONVENTIONAL: 1098 814 if (!blk_queue_discard(bdev_get_queue(bdev))) 1099 815 return 0; 1100 - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); 816 + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); 1101 817 case BLK_ZONE_TYPE_SEQWRITE_REQ: 1102 818 case BLK_ZONE_TYPE_SEQWRITE_PREF: 1103 819 sector = SECTOR_FROM_BLOCK(blkstart); ··· 1129 845 bdev_zoned_model(bdev) != BLK_ZONED_NONE) 1130 846 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); 1131 847 #endif 1132 - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); 848 + return __queue_discard_cmd(sbi, bdev, blkstart, blklen); 1133 849 } 1134 850 1135 851 static int f2fs_issue_discard(struct f2fs_sb_info *sbi, ··· 1172 888 return err; 1173 889 } 1174 890 1175 - static void __add_discard_entry(struct f2fs_sb_info *sbi, 1176 - struct cp_control *cpc, struct seg_entry *se, 1177 - unsigned int start, unsigned int end) 1178 - { 1179 - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; 1180 - struct discard_entry *new, *last; 1181 - 1182 - if (!list_empty(head)) { 1183 - last = list_last_entry(head, struct discard_entry, list); 1184 - if (START_BLOCK(sbi, cpc->trim_start) + start == 1185 - last->blkaddr + last->len && 1186 - last->len < MAX_DISCARD_BLOCKS(sbi)) { 1187 - last->len += end - start; 1188 - goto done; 1189 - } 1190 - } 1191 - 1192 - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 1193 - INIT_LIST_HEAD(&new->list); 1194 - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; 1195 - new->len = end - start; 1196 - list_add_tail(&new->list, head); 1197 - done: 1198 - SM_I(sbi)->dcc_info->nr_discards += end - start; 1199 - } 1200 - 1201 891 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, 1202 892 bool check_only) 1203 893 { ··· 1183 925 unsigned long *discard_map = (unsigned long *)se->discard_map; 1184 926 unsigned long *dmap = SIT_I(sbi)->tmp_map; 1185 927 unsigned int start = 0, end = -1; 1186 - bool force = (cpc->reason == CP_DISCARD); 928 + bool force = (cpc->reason & CP_DISCARD); 929 + struct discard_entry *de = NULL; 930 + struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; 1187 931 int i; 1188 932 1189 933 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) ··· 1217 957 if (check_only) 1218 958 return true; 1219 959 1220 - __add_discard_entry(sbi, cpc, se, start, end); 960 + if (!de) { 961 + de = f2fs_kmem_cache_alloc(discard_entry_slab, 962 + GFP_F2FS_ZERO); 963 + de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); 964 + list_add_tail(&de->list, head); 965 + } 966 + 967 + for (i = start; i < end; i++) 968 + __set_bit_le(i, (void *)de->discard_map); 969 + 970 + SM_I(sbi)->dcc_info->nr_discards += end - start; 1221 971 } 1222 972 return false; 1223 973 } 1224 974 1225 975 void release_discard_addrs(struct f2fs_sb_info *sbi) 1226 976 { 1227 - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); 977 + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1228 978 struct discard_entry *entry, *this; 1229 979 1230 980 /* drop caches */ ··· 1260 990 1261 991 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1262 992 { 1263 - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); 993 + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); 1264 994 struct discard_entry *entry, *this; 1265 995 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1266 996 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 1267 997 unsigned int start = 0, end = -1; 1268 998 unsigned int secno, start_segno; 1269 - bool force = (cpc->reason == CP_DISCARD); 999 + bool force = (cpc->reason & CP_DISCARD); 1270 1000 1271 1001 mutex_lock(&dirty_i->seglist_lock); 1272 1002 ··· 1296 1026 continue; 1297 1027 } 1298 1028 next: 1299 - secno = GET_SECNO(sbi, start); 1300 - start_segno = secno * sbi->segs_per_sec; 1029 + secno = GET_SEC_FROM_SEG(sbi, start); 1030 + start_segno = GET_SEG_FROM_SEC(sbi, secno); 1301 1031 if (!IS_CURSEC(sbi, secno) && 1302 - !get_valid_blocks(sbi, start, sbi->segs_per_sec)) 1032 + !get_valid_blocks(sbi, start, true)) 1303 1033 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), 1304 1034 sbi->segs_per_sec << sbi->log_blocks_per_seg); 1305 1035 ··· 1313 1043 1314 1044 /* send small discards */ 1315 1045 list_for_each_entry_safe(entry, this, head, list) { 1316 - if (force && entry->len < cpc->trim_minlen) 1317 - goto skip; 1318 - f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 1319 - cpc->trimmed += entry->len; 1046 + unsigned int cur_pos = 0, next_pos, len, total_len = 0; 1047 + bool is_valid = test_bit_le(0, entry->discard_map); 1048 + 1049 + find_next: 1050 + if (is_valid) { 1051 + next_pos = find_next_zero_bit_le(entry->discard_map, 1052 + sbi->blocks_per_seg, cur_pos); 1053 + len = next_pos - cur_pos; 1054 + 1055 + if (force && len < cpc->trim_minlen) 1056 + goto skip; 1057 + 1058 + f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 1059 + len); 1060 + cpc->trimmed += len; 1061 + total_len += len; 1062 + } else { 1063 + next_pos = find_next_bit_le(entry->discard_map, 1064 + sbi->blocks_per_seg, cur_pos); 1065 + } 1320 1066 skip: 1067 + cur_pos = next_pos; 1068 + is_valid = !is_valid; 1069 + 1070 + if (cur_pos < sbi->blocks_per_seg) 1071 + goto find_next; 1072 + 1321 1073 list_del(&entry->list); 1322 - SM_I(sbi)->dcc_info->nr_discards -= entry->len; 1074 + SM_I(sbi)->dcc_info->nr_discards -= total_len; 1323 1075 kmem_cache_free(discard_entry_slab, entry); 1324 1076 } 1077 + 1078 + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); 1325 1079 } 1326 1080 1327 1081 static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 1328 1082 { 1329 1083 dev_t dev = sbi->sb->s_bdev->bd_dev; 1330 1084 struct discard_cmd_control *dcc; 1331 - int err = 0; 1085 + int err = 0, i; 1332 1086 1333 1087 if (SM_I(sbi)->dcc_info) { 1334 1088 dcc = SM_I(sbi)->dcc_info; ··· 1363 1069 if (!dcc) 1364 1070 return -ENOMEM; 1365 1071 1366 - INIT_LIST_HEAD(&dcc->discard_entry_list); 1367 - INIT_LIST_HEAD(&dcc->discard_cmd_list); 1072 + INIT_LIST_HEAD(&dcc->entry_list); 1073 + for (i = 0; i < MAX_PLIST_NUM; i++) 1074 + INIT_LIST_HEAD(&dcc->pend_list[i]); 1075 + INIT_LIST_HEAD(&dcc->wait_list); 1368 1076 mutex_init(&dcc->cmd_lock); 1369 - atomic_set(&dcc->submit_discard, 0); 1077 + atomic_set(&dcc->issued_discard, 0); 1078 + atomic_set(&dcc->issing_discard, 0); 1079 + atomic_set(&dcc->discard_cmd_cnt, 0); 1370 1080 dcc->nr_discards = 0; 1371 - dcc->max_discards = 0; 1081 + dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; 1082 + dcc->undiscard_blks = 0; 1083 + dcc->root = RB_ROOT; 1372 1084 1373 1085 init_waitqueue_head(&dcc->discard_wait_queue); 1374 1086 SM_I(sbi)->dcc_info = dcc; ··· 1391 1091 return err; 1392 1092 } 1393 1093 1394 - static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) 1094 + static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) 1395 1095 { 1396 1096 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1397 1097 1398 - if (dcc && dcc->f2fs_issue_discard) { 1098 + if (!dcc) 1099 + return; 1100 + 1101 + if (dcc->f2fs_issue_discard) { 1399 1102 struct task_struct *discard_thread = dcc->f2fs_issue_discard; 1400 1103 1401 1104 dcc->f2fs_issue_discard = NULL; 1402 1105 kthread_stop(discard_thread); 1403 1106 } 1404 - if (free) { 1405 - kfree(dcc); 1406 - SM_I(sbi)->dcc_info = NULL; 1407 - } 1107 + 1108 + kfree(dcc); 1109 + SM_I(sbi)->dcc_info = NULL; 1408 1110 } 1409 1111 1410 1112 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) ··· 1647 1345 f2fs_put_page(page, 1); 1648 1346 } 1649 1347 1348 + static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) 1349 + { 1350 + struct curseg_info *curseg = CURSEG_I(sbi, type); 1351 + unsigned int segno = curseg->segno + 1; 1352 + struct free_segmap_info *free_i = FREE_I(sbi); 1353 + 1354 + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) 1355 + return !test_bit(segno, free_i->free_segmap); 1356 + return 0; 1357 + } 1358 + 1650 1359 /* 1651 1360 * Find a new segment from the free segments bitmap to right order 1652 1361 * This function should be returned with success, otherwise BUG ··· 1668 1355 struct free_segmap_info *free_i = FREE_I(sbi); 1669 1356 unsigned int segno, secno, zoneno; 1670 1357 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; 1671 - unsigned int hint = *newseg / sbi->segs_per_sec; 1672 - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 1358 + unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); 1359 + unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); 1673 1360 unsigned int left_start = hint; 1674 1361 bool init = true; 1675 1362 int go_left = 0; ··· 1679 1366 1680 1367 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 1681 1368 segno = find_next_zero_bit(free_i->free_segmap, 1682 - (hint + 1) * sbi->segs_per_sec, *newseg + 1); 1683 - if (segno < (hint + 1) * sbi->segs_per_sec) 1369 + GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); 1370 + if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) 1684 1371 goto got_it; 1685 1372 } 1686 1373 find_other_zone: ··· 1711 1398 secno = left_start; 1712 1399 skip_left: 1713 1400 hint = secno; 1714 - segno = secno * sbi->segs_per_sec; 1715 - zoneno = secno / sbi->secs_per_zone; 1401 + segno = GET_SEG_FROM_SEC(sbi, secno); 1402 + zoneno = GET_ZONE_FROM_SEC(sbi, secno); 1716 1403 1717 1404 /* give up on finding another zone */ 1718 1405 if (!init) ··· 1756 1443 struct summary_footer *sum_footer; 1757 1444 1758 1445 curseg->segno = curseg->next_segno; 1759 - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); 1446 + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); 1760 1447 curseg->next_blkoff = 0; 1761 1448 curseg->next_segno = NULL_SEGNO; 1762 1449 ··· 1767 1454 if (IS_NODESEG(type)) 1768 1455 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); 1769 1456 __set_sit_entry_type(sbi, type, curseg->segno, modified); 1457 + } 1458 + 1459 + static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) 1460 + { 1461 + /* if segs_per_sec is large than 1, we need to keep original policy. */ 1462 + if (sbi->segs_per_sec != 1) 1463 + return CURSEG_I(sbi, type)->segno; 1464 + 1465 + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) 1466 + return 0; 1467 + 1468 + if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) 1469 + return SIT_I(sbi)->last_victim[ALLOC_NEXT]; 1470 + return CURSEG_I(sbi, type)->segno; 1770 1471 } 1771 1472 1772 1473 /* ··· 1801 1474 if (test_opt(sbi, NOHEAP)) 1802 1475 dir = ALLOC_RIGHT; 1803 1476 1477 + segno = __get_next_segno(sbi, type); 1804 1478 get_new_segment(sbi, &segno, new_sec, dir); 1805 1479 curseg->next_segno = segno; 1806 1480 reset_curseg(sbi, type, 1); ··· 1877 1549 { 1878 1550 struct curseg_info *curseg = CURSEG_I(sbi, type); 1879 1551 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 1552 + unsigned segno = NULL_SEGNO; 1880 1553 int i, cnt; 1881 1554 bool reversed = false; 1882 1555 1883 1556 /* need_SSR() already forces to do this */ 1884 - if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) 1557 + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { 1558 + curseg->next_segno = segno; 1885 1559 return 1; 1560 + } 1886 1561 1887 1562 /* For node segments, let's do SSR more intensively */ 1888 1563 if (IS_NODESEG(type)) { ··· 1909 1578 for (; cnt-- > 0; reversed ? i-- : i++) { 1910 1579 if (i == type) 1911 1580 continue; 1912 - if (v_ops->get_victim(sbi, &(curseg)->next_segno, 1913 - BG_GC, i, SSR)) 1581 + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { 1582 + curseg->next_segno = segno; 1914 1583 return 1; 1584 + } 1915 1585 } 1916 1586 return 0; 1917 1587 } ··· 1924 1592 static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 1925 1593 int type, bool force) 1926 1594 { 1595 + struct curseg_info *curseg = CURSEG_I(sbi, type); 1596 + 1927 1597 if (force) 1928 1598 new_curseg(sbi, type, true); 1929 1599 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && 1930 1600 type == CURSEG_WARM_NODE) 1601 + new_curseg(sbi, type, false); 1602 + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) 1931 1603 new_curseg(sbi, type, false); 1932 1604 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 1933 1605 change_curseg(sbi, type, true); 1934 1606 else 1935 1607 new_curseg(sbi, type, false); 1936 1608 1937 - stat_inc_seg_type(sbi, CURSEG_I(sbi, type)); 1609 + stat_inc_seg_type(sbi, curseg); 1938 1610 } 1939 1611 1940 1612 void allocate_new_segments(struct f2fs_sb_info *sbi) ··· 2070 1734 if (p_type == DATA) { 2071 1735 struct inode *inode = page->mapping->host; 2072 1736 2073 - if (S_ISDIR(inode->i_mode)) 2074 - return CURSEG_HOT_DATA; 2075 - else if (is_cold_data(page) || file_is_cold(inode)) 1737 + if (is_cold_data(page) || file_is_cold(inode)) 2076 1738 return CURSEG_COLD_DATA; 2077 - else 2078 - return CURSEG_WARM_DATA; 1739 + if (is_inode_flag_set(inode, FI_HOT_DATA)) 1740 + return CURSEG_HOT_DATA; 1741 + return CURSEG_WARM_DATA; 2079 1742 } else { 2080 1743 if (IS_DNODE(page)) 2081 1744 return is_cold_node(page) ? CURSEG_WARM_NODE : 2082 1745 CURSEG_HOT_NODE; 2083 - else 2084 - return CURSEG_COLD_NODE; 1746 + return CURSEG_COLD_NODE; 2085 1747 } 2086 1748 } 2087 1749 ··· 2122 1788 2123 1789 stat_inc_block_count(sbi, curseg); 2124 1790 2125 - /* 2126 - * SIT information should be updated before segment allocation, 2127 - * since SSR needs latest valid block information. 2128 - */ 2129 - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 2130 - 2131 1791 if (!__has_curseg_space(sbi, type)) 2132 1792 sit_i->s_ops->allocate_segment(sbi, type, false); 1793 + /* 1794 + * SIT information should be updated after segment allocation, 1795 + * since we need to keep dirty segments precisely under SSR. 1796 + */ 1797 + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 2133 1798 2134 1799 mutex_unlock(&sit_i->sentry_lock); 2135 1800 ··· 2201 1868 f2fs_update_data_blkaddr(dn, fio->new_blkaddr); 2202 1869 } 2203 1870 2204 - void rewrite_data_page(struct f2fs_io_info *fio) 1871 + int rewrite_data_page(struct f2fs_io_info *fio) 2205 1872 { 2206 1873 fio->new_blkaddr = fio->old_blkaddr; 2207 1874 stat_inc_inplace_blocks(fio->sbi); 2208 - f2fs_submit_page_mbio(fio); 1875 + return f2fs_submit_page_bio(fio); 2209 1876 } 2210 1877 2211 1878 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, ··· 2770 2437 se = get_seg_entry(sbi, segno); 2771 2438 2772 2439 /* add discard candidates */ 2773 - if (cpc->reason != CP_DISCARD) { 2440 + if (!(cpc->reason & CP_DISCARD)) { 2774 2441 cpc->trim_start = segno; 2775 2442 add_discard_addrs(sbi, cpc, false); 2776 2443 } ··· 2806 2473 f2fs_bug_on(sbi, !list_empty(head)); 2807 2474 f2fs_bug_on(sbi, sit_i->dirty_sentries); 2808 2475 out: 2809 - if (cpc->reason == CP_DISCARD) { 2476 + if (cpc->reason & CP_DISCARD) { 2810 2477 __u64 trim_start = cpc->trim_start; 2811 2478 2812 2479 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) ··· 3005 2672 3006 2673 /* build discard map only one time */ 3007 2674 if (f2fs_discard_en(sbi)) { 3008 - memcpy(se->discard_map, se->cur_valid_map, 3009 - SIT_VBLOCK_MAP_SIZE); 3010 - sbi->discard_blks += sbi->blocks_per_seg - 3011 - se->valid_blocks; 2675 + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 2676 + memset(se->discard_map, 0xff, 2677 + SIT_VBLOCK_MAP_SIZE); 2678 + } else { 2679 + memcpy(se->discard_map, 2680 + se->cur_valid_map, 2681 + SIT_VBLOCK_MAP_SIZE); 2682 + sbi->discard_blks += 2683 + sbi->blocks_per_seg - 2684 + se->valid_blocks; 2685 + } 3012 2686 } 3013 2687 3014 2688 if (sbi->segs_per_sec > 1) ··· 3039 2699 seg_info_from_raw_sit(se, &sit); 3040 2700 3041 2701 if (f2fs_discard_en(sbi)) { 3042 - memcpy(se->discard_map, se->cur_valid_map, 3043 - SIT_VBLOCK_MAP_SIZE); 3044 - sbi->discard_blks += old_valid_blocks - 3045 - se->valid_blocks; 2702 + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { 2703 + memset(se->discard_map, 0xff, 2704 + SIT_VBLOCK_MAP_SIZE); 2705 + } else { 2706 + memcpy(se->discard_map, se->cur_valid_map, 2707 + SIT_VBLOCK_MAP_SIZE); 2708 + sbi->discard_blks += old_valid_blocks - 2709 + se->valid_blocks; 2710 + } 3046 2711 } 3047 2712 3048 2713 if (sbi->segs_per_sec > 1) ··· 3091 2746 if (segno >= MAIN_SEGS(sbi)) 3092 2747 break; 3093 2748 offset = segno + 1; 3094 - valid_blocks = get_valid_blocks(sbi, segno, 0); 2749 + valid_blocks = get_valid_blocks(sbi, segno, false); 3095 2750 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) 3096 2751 continue; 3097 2752 if (valid_blocks > sbi->blocks_per_seg) { ··· 3197 2852 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; 3198 2853 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 3199 2854 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 2855 + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 3200 2856 3201 2857 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 3202 2858 ··· 3334 2988 if (!sm_info) 3335 2989 return; 3336 2990 destroy_flush_cmd_control(sbi, true); 3337 - destroy_discard_cmd_control(sbi, true); 2991 + destroy_discard_cmd_control(sbi); 3338 2992 destroy_dirty_segmap(sbi); 3339 2993 destroy_curseg(sbi); 3340 2994 destroy_free_segmap(sbi);
+80 -59
fs/f2fs/segment.h
··· 21 21 #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ 22 22 23 23 /* L: Logical segment # in volume, R: Relative segment # in main area */ 24 - #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 25 - #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 24 + #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) 25 + #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) 26 26 27 - #define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) 28 - #define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) 27 + #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) 28 + #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) 29 29 30 30 #define IS_CURSEG(sbi, seg) \ 31 - ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ 32 - (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ 33 - (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ 34 - (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ 35 - (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ 36 - (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) 31 + (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ 32 + ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ 33 + ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ 34 + ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ 35 + ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ 36 + ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) 37 37 38 38 #define IS_CURSEC(sbi, secno) \ 39 - ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ 40 - sbi->segs_per_sec) || \ 41 - (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ 42 - sbi->segs_per_sec) || \ 43 - (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ 44 - sbi->segs_per_sec) || \ 45 - (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ 46 - sbi->segs_per_sec) || \ 47 - (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ 48 - sbi->segs_per_sec) || \ 49 - (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ 50 - sbi->segs_per_sec)) \ 39 + (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ 40 + (sbi)->segs_per_sec) || \ 41 + ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ 42 + (sbi)->segs_per_sec) || \ 43 + ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ 44 + (sbi)->segs_per_sec) || \ 45 + ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ 46 + (sbi)->segs_per_sec) || \ 47 + ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ 48 + (sbi)->segs_per_sec) || \ 49 + ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ 50 + (sbi)->segs_per_sec)) \ 51 51 52 52 #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) 53 53 #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) 54 54 55 55 #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) 56 - #define MAIN_SECS(sbi) (sbi->total_sections) 56 + #define MAIN_SECS(sbi) ((sbi)->total_sections) 57 57 58 58 #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) 59 - #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) 59 + #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) 60 60 61 61 #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) 62 - #define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ 63 - sbi->log_blocks_per_seg)) 62 + #define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ 63 + (sbi)->log_blocks_per_seg)) 64 64 65 65 #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ 66 - (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) 66 + (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) 67 67 68 68 #define NEXT_FREE_BLKADDR(sbi, curseg) \ 69 - (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) 69 + (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) 70 70 71 71 #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) 72 72 #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 73 - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 73 + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) 74 74 #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ 75 - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) 75 + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) 76 76 77 77 #define GET_SEGNO(sbi, blk_addr) \ 78 - (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ 78 + ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ 79 79 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ 80 80 GET_SEGNO_FROM_SEG0(sbi, blk_addr))) 81 - #define GET_SECNO(sbi, segno) \ 82 - ((segno) / sbi->segs_per_sec) 83 - #define GET_ZONENO_FROM_SEGNO(sbi, segno) \ 84 - ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) 81 + #define BLKS_PER_SEC(sbi) \ 82 + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) 83 + #define GET_SEC_FROM_SEG(sbi, segno) \ 84 + ((segno) / (sbi)->segs_per_sec) 85 + #define GET_SEG_FROM_SEC(sbi, secno) \ 86 + ((secno) * (sbi)->segs_per_sec) 87 + #define GET_ZONE_FROM_SEC(sbi, secno) \ 88 + ((secno) / (sbi)->secs_per_zone) 89 + #define GET_ZONE_FROM_SEG(sbi, segno) \ 90 + GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) 85 91 86 92 #define GET_SUM_BLOCK(sbi, segno) \ 87 - ((sbi->sm_info->ssa_blkaddr) + segno) 93 + ((sbi)->sm_info->ssa_blkaddr + (segno)) 88 94 89 95 #define GET_SUM_TYPE(footer) ((footer)->entry_type) 90 - #define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) 96 + #define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) 91 97 92 98 #define SIT_ENTRY_OFFSET(sit_i, segno) \ 93 - (segno % sit_i->sents_per_block) 99 + ((segno) % (sit_i)->sents_per_block) 94 100 #define SIT_BLOCK_OFFSET(segno) \ 95 - (segno / SIT_ENTRY_PER_BLOCK) 101 + ((segno) / SIT_ENTRY_PER_BLOCK) 96 102 #define START_SEGNO(segno) \ 97 103 (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) 98 104 #define SIT_BLK_CNT(sbi) \ ··· 109 103 #define SECTOR_FROM_BLOCK(blk_addr) \ 110 104 (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) 111 105 #define SECTOR_TO_BLOCK(sectors) \ 112 - (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) 106 + ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) 113 107 114 108 /* 115 109 * indicate a block allocation direction: RIGHT and LEFT. ··· 138 132 */ 139 133 enum { 140 134 GC_CB = 0, 141 - GC_GREEDY 135 + GC_GREEDY, 136 + ALLOC_NEXT, 137 + FLUSH_DEVICE, 138 + MAX_GC_POLICY, 142 139 }; 143 140 144 141 /* ··· 236 227 unsigned long long mounted_time; /* mount time */ 237 228 unsigned long long min_mtime; /* min. modification time */ 238 229 unsigned long long max_mtime; /* max. modification time */ 230 + 231 + unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */ 239 232 }; 240 233 241 234 struct free_segmap_info { ··· 314 303 unsigned int segno) 315 304 { 316 305 struct sit_info *sit_i = SIT_I(sbi); 317 - return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; 306 + return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)]; 318 307 } 319 308 320 309 static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, 321 - unsigned int segno, int section) 310 + unsigned int segno, bool use_section) 322 311 { 323 312 /* 324 313 * In order to get # of valid blocks in a section instantly from many 325 314 * segments, f2fs manages two counting structures separately. 326 315 */ 327 - if (section > 1) 316 + if (use_section && sbi->segs_per_sec > 1) 328 317 return get_sec_entry(sbi, segno)->valid_blocks; 329 318 else 330 319 return get_seg_entry(sbi, segno)->valid_blocks; ··· 369 358 static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) 370 359 { 371 360 struct free_segmap_info *free_i = FREE_I(sbi); 372 - unsigned int secno = segno / sbi->segs_per_sec; 373 - unsigned int start_segno = secno * sbi->segs_per_sec; 361 + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 362 + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); 374 363 unsigned int next; 375 364 376 365 spin_lock(&free_i->segmap_lock); ··· 390 379 unsigned int segno) 391 380 { 392 381 struct free_segmap_info *free_i = FREE_I(sbi); 393 - unsigned int secno = segno / sbi->segs_per_sec; 382 + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 383 + 394 384 set_bit(segno, free_i->free_segmap); 395 385 free_i->free_segments--; 396 386 if (!test_and_set_bit(secno, free_i->free_secmap)) ··· 402 390 unsigned int segno) 403 391 { 404 392 struct free_segmap_info *free_i = FREE_I(sbi); 405 - unsigned int secno = segno / sbi->segs_per_sec; 406 - unsigned int start_segno = secno * sbi->segs_per_sec; 393 + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 394 + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); 407 395 unsigned int next; 408 396 409 397 spin_lock(&free_i->segmap_lock); ··· 424 412 unsigned int segno) 425 413 { 426 414 struct free_segmap_info *free_i = FREE_I(sbi); 427 - unsigned int secno = segno / sbi->segs_per_sec; 415 + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); 416 + 428 417 spin_lock(&free_i->segmap_lock); 429 418 if (!test_and_set_bit(segno, free_i->free_segmap)) { 430 419 free_i->free_segments--; ··· 490 477 491 478 static inline int overprovision_sections(struct f2fs_sb_info *sbi) 492 479 { 493 - return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; 480 + return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi)); 494 481 } 495 482 496 483 static inline int reserved_sections(struct f2fs_sb_info *sbi) 497 484 { 498 - return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; 485 + return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); 499 486 } 500 487 501 488 static inline bool need_SSR(struct f2fs_sb_info *sbi) ··· 508 495 return false; 509 496 510 497 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + 511 - reserved_sections(sbi) + 1); 498 + 2 * reserved_sections(sbi)); 512 499 } 513 500 514 501 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, ··· 553 540 */ 554 541 #define DEF_MIN_IPU_UTIL 70 555 542 #define DEF_MIN_FSYNC_BLOCKS 8 543 + #define DEF_MIN_HOT_BLOCKS 16 556 544 557 545 enum { 558 546 F2FS_IPU_FORCE, ··· 561 547 F2FS_IPU_UTIL, 562 548 F2FS_IPU_SSR_UTIL, 563 549 F2FS_IPU_FSYNC, 550 + F2FS_IPU_ASYNC, 564 551 }; 565 552 566 - static inline bool need_inplace_update(struct inode *inode) 553 + static inline bool need_inplace_update_policy(struct inode *inode, 554 + struct f2fs_io_info *fio) 567 555 { 568 556 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 569 557 unsigned int policy = SM_I(sbi)->ipu_policy; 570 - 571 - /* IPU can be done only for the user data */ 572 - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) 573 - return false; 574 558 575 559 if (test_opt(sbi, LFS)) 576 560 return false; ··· 582 570 return true; 583 571 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && 584 572 utilization(sbi) > SM_I(sbi)->min_ipu_util) 573 + return true; 574 + 575 + /* 576 + * IPU for rewrite async pages 577 + */ 578 + if (policy & (0x1 << F2FS_IPU_ASYNC) && 579 + fio && fio->op == REQ_OP_WRITE && 580 + !(fio->op_flags & REQ_SYNC) && 581 + !f2fs_encrypted_inode(inode)) 585 582 return true; 586 583 587 584 /* this is only set during fdatasync */ ··· 740 719 static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, 741 720 unsigned int secno) 742 721 { 743 - if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= 722 + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= 744 723 sbi->fggc_threshold) 745 724 return true; 746 725 return false;
+38 -10
fs/f2fs/super.c
··· 49 49 [FAULT_BLOCK] = "no more block", 50 50 [FAULT_DIR_DEPTH] = "too big dir depth", 51 51 [FAULT_EVICT_INODE] = "evict_inode fail", 52 + [FAULT_TRUNCATE] = "truncate fail", 52 53 [FAULT_IO] = "IO error", 53 54 [FAULT_CHECKPOINT] = "checkpoint error", 54 55 }; ··· 83 82 Opt_discard, 84 83 Opt_nodiscard, 85 84 Opt_noheap, 85 + Opt_heap, 86 86 Opt_user_xattr, 87 87 Opt_nouser_xattr, 88 88 Opt_acl, ··· 118 116 {Opt_discard, "discard"}, 119 117 {Opt_nodiscard, "nodiscard"}, 120 118 {Opt_noheap, "no_heap"}, 119 + {Opt_heap, "heap"}, 121 120 {Opt_user_xattr, "user_xattr"}, 122 121 {Opt_nouser_xattr, "nouser_xattr"}, 123 122 {Opt_acl, "acl"}, ··· 296 293 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 297 294 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 298 295 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); 296 + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); 299 297 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 300 298 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); 301 299 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); ··· 322 318 ATTR_LIST(ipu_policy), 323 319 ATTR_LIST(min_ipu_util), 324 320 ATTR_LIST(min_fsync_blocks), 321 + ATTR_LIST(min_hot_blocks), 325 322 ATTR_LIST(max_victim_search), 326 323 ATTR_LIST(dir_level), 327 324 ATTR_LIST(ram_thresh), ··· 440 435 break; 441 436 case Opt_noheap: 442 437 set_opt(sbi, NOHEAP); 438 + break; 439 + case Opt_heap: 440 + clear_opt(sbi, NOHEAP); 443 441 break; 444 442 #ifdef CONFIG_F2FS_FS_XATTR 445 443 case Opt_user_xattr: ··· 795 787 } 796 788 797 789 /* be sure to wait for any on-going discard commands */ 798 - f2fs_wait_discard_bio(sbi, NULL_ADDR); 790 + f2fs_wait_discard_bios(sbi); 791 + 792 + if (!sbi->discard_blks) { 793 + struct cp_control cpc = { 794 + .reason = CP_UMOUNT | CP_TRIMMED, 795 + }; 796 + write_checkpoint(sbi, &cpc); 797 + } 799 798 800 799 /* write_checkpoint can update stat informaion */ 801 800 f2fs_destroy_stats(sbi); ··· 928 913 if (test_opt(sbi, DISCARD)) 929 914 seq_puts(seq, ",discard"); 930 915 if (test_opt(sbi, NOHEAP)) 931 - seq_puts(seq, ",no_heap_alloc"); 916 + seq_puts(seq, ",no_heap"); 917 + else 918 + seq_puts(seq, ",heap"); 932 919 #ifdef CONFIG_F2FS_FS_XATTR 933 920 if (test_opt(sbi, XATTR_USER)) 934 921 seq_puts(seq, ",user_xattr"); ··· 1003 986 if ((i % 10) == 0) 1004 987 seq_printf(seq, "%-10d", i); 1005 988 seq_printf(seq, "%d|%-3u", se->type, 1006 - get_valid_blocks(sbi, i, 1)); 989 + get_valid_blocks(sbi, i, false)); 1007 990 if ((i % 10) == 9 || i == (total_segs - 1)) 1008 991 seq_putc(seq, '\n'); 1009 992 else ··· 1029 1012 1030 1013 seq_printf(seq, "%-10d", i); 1031 1014 seq_printf(seq, "%d|%-3u|", se->type, 1032 - get_valid_blocks(sbi, i, 1)); 1015 + get_valid_blocks(sbi, i, false)); 1033 1016 for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) 1034 1017 seq_printf(seq, " %.2x", se->cur_valid_map[j]); 1035 1018 seq_putc(seq, '\n'); ··· 1063 1046 set_opt(sbi, INLINE_DATA); 1064 1047 set_opt(sbi, INLINE_DENTRY); 1065 1048 set_opt(sbi, EXTENT_CACHE); 1049 + set_opt(sbi, NOHEAP); 1066 1050 sbi->sb->s_flags |= MS_LAZYTIME; 1067 1051 set_opt(sbi, FLUSH_MERGE); 1068 1052 if (f2fs_sb_mounted_blkzoned(sbi->sb)) { ··· 1325 1307 unlock_buffer(bh); 1326 1308 1327 1309 /* it's rare case, we can do fua all the time */ 1328 - return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); 1310 + return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); 1329 1311 } 1330 1312 1331 1313 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, ··· 1501 1483 return 1; 1502 1484 } 1503 1485 1486 + if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { 1487 + f2fs_msg(sb, KERN_INFO, 1488 + "Invalid segment count (%u)", 1489 + le32_to_cpu(raw_super->segment_count)); 1490 + return 1; 1491 + } 1492 + 1504 1493 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ 1505 1494 if (sanity_check_area_boundary(sbi, bh)) 1506 1495 return 1; ··· 1579 1554 1580 1555 for (i = 0; i < NR_COUNT_TYPE; i++) 1581 1556 atomic_set(&sbi->nr_pages[i], 0); 1557 + 1558 + atomic_set(&sbi->wb_sync_req, 0); 1582 1559 1583 1560 INIT_LIST_HEAD(&sbi->s_list); 1584 1561 mutex_init(&sbi->umount_mutex); ··· 1944 1917 mutex_init(&sbi->gc_mutex); 1945 1918 mutex_init(&sbi->cp_mutex); 1946 1919 init_rwsem(&sbi->node_write); 1920 + init_rwsem(&sbi->node_change); 1947 1921 1948 1922 /* disallow all the data/node/meta page writes */ 1949 1923 set_sbi_flag(sbi, SBI_POR_DOING); ··· 2050 2022 2051 2023 f2fs_join_shrinker(sbi); 2052 2024 2025 + err = f2fs_build_stats(sbi); 2026 + if (err) 2027 + goto free_nm; 2028 + 2053 2029 /* if there are nt orphan nodes free them */ 2054 2030 err = recover_orphan_inodes(sbi); 2055 2031 if (err) ··· 2077 2045 err = -ENOMEM; 2078 2046 goto free_root_inode; 2079 2047 } 2080 - 2081 - err = f2fs_build_stats(sbi); 2082 - if (err) 2083 - goto free_root_inode; 2084 2048 2085 2049 if (f2fs_proc_root) 2086 2050 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); ··· 2171 2143 remove_proc_entry("segment_bits", sbi->s_proc); 2172 2144 remove_proc_entry(sb->s_id, f2fs_proc_root); 2173 2145 } 2174 - f2fs_destroy_stats(sbi); 2175 2146 free_root_inode: 2176 2147 dput(sb->s_root); 2177 2148 sb->s_root = NULL; ··· 2188 2161 truncate_inode_pages_final(META_MAPPING(sbi)); 2189 2162 iput(sbi->node_inode); 2190 2163 mutex_unlock(&sbi->umount_mutex); 2164 + f2fs_destroy_stats(sbi); 2191 2165 free_nm: 2192 2166 destroy_node_manager(sbi); 2193 2167 free_sm:
+2 -2
fs/f2fs/trace.c
··· 59 59 pid_t pid = task_pid_nr(current); 60 60 void *p; 61 61 62 - page->private = pid; 62 + set_page_private(page, (unsigned long)pid); 63 63 64 64 if (radix_tree_preload(GFP_NOFS)) 65 65 return; ··· 138 138 139 139 radix_tree_for_each_slot(slot, &pids, &iter, first_index) { 140 140 results[ret] = iter.index; 141 - if (++ret == PIDVEC_SIZE) 141 + if (++ret == max_items) 142 142 break; 143 143 } 144 144 return ret;
+15 -16
fs/f2fs/xattr.c
··· 250 250 void *cur_addr, *txattr_addr, *last_addr = NULL; 251 251 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 252 252 unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; 253 - unsigned int inline_size = 0; 253 + unsigned int inline_size = inline_xattr_size(inode); 254 254 int err = 0; 255 - 256 - inline_size = inline_xattr_size(inode); 257 255 258 256 if (!size && !inline_size) 259 257 return -ENODATA; 260 258 261 - txattr_addr = kzalloc(inline_size + size + sizeof(__u32), 259 + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, 262 260 GFP_F2FS_ZERO); 263 261 if (!txattr_addr) 264 262 return -ENOMEM; ··· 326 328 { 327 329 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 328 330 struct f2fs_xattr_header *header; 329 - size_t size = PAGE_SIZE, inline_size = 0; 331 + nid_t xnid = F2FS_I(inode)->i_xattr_nid; 332 + unsigned int size = VALID_XATTR_BLOCK_SIZE; 333 + unsigned int inline_size = inline_xattr_size(inode); 330 334 void *txattr_addr; 331 335 int err; 332 336 333 - inline_size = inline_xattr_size(inode); 334 - 335 - txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); 337 + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, 338 + GFP_F2FS_ZERO); 336 339 if (!txattr_addr) 337 340 return -ENOMEM; 338 341 ··· 357 358 } 358 359 359 360 /* read from xattr node block */ 360 - if (F2FS_I(inode)->i_xattr_nid) { 361 + if (xnid) { 361 362 struct page *xpage; 362 363 void *xattr_addr; 363 364 364 365 /* The inode already has an extended attribute block. */ 365 - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); 366 + xpage = get_node_page(sbi, xnid); 366 367 if (IS_ERR(xpage)) { 367 368 err = PTR_ERR(xpage); 368 369 goto fail; 369 370 } 370 371 371 372 xattr_addr = page_address(xpage); 372 - memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); 373 + memcpy(txattr_addr + inline_size, xattr_addr, size); 373 374 f2fs_put_page(xpage, 1); 374 375 } 375 376 ··· 391 392 void *txattr_addr, struct page *ipage) 392 393 { 393 394 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 394 - size_t inline_size = 0; 395 + size_t inline_size = inline_xattr_size(inode); 395 396 void *xattr_addr; 396 397 struct page *xpage; 397 398 nid_t new_nid = 0; 398 399 int err; 399 - 400 - inline_size = inline_xattr_size(inode); 401 400 402 401 if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) 403 402 if (!alloc_nid(sbi, &new_nid)) ··· 451 454 } 452 455 453 456 xattr_addr = page_address(xpage); 454 - memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE); 457 + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); 455 458 set_page_dirty(xpage); 456 459 f2fs_put_page(xpage, 1); 457 460 ··· 543 546 const void *value, size_t size) 544 547 { 545 548 void *pval = entry->e_name + entry->e_name_len; 546 - return (entry->e_value_size == size) && !memcmp(pval, value, size); 549 + 550 + return (le16_to_cpu(entry->e_value_size) == size) && 551 + !memcmp(pval, value, size); 547 552 } 548 553 549 554 static int __f2fs_setxattr(struct inode *inode, int index,
+4 -4
fs/f2fs/xattr.h
··· 58 58 #define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) 59 59 #define XATTR_ROUND (3) 60 60 61 - #define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) 61 + #define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND) 62 62 63 63 #define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ 64 - entry->e_name_len + le16_to_cpu(entry->e_value_size))) 64 + (entry)->e_name_len + le16_to_cpu((entry)->e_value_size))) 65 65 66 66 #define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ 67 67 ENTRY_SIZE(entry))) ··· 72 72 for (entry = XATTR_FIRST_ENTRY(addr);\ 73 73 !IS_XATTR_LAST_ENTRY(entry);\ 74 74 entry = XATTR_NEXT_ENTRY(entry)) 75 - #define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) 76 - #define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32)) 75 + #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) 76 + #define XATTR_PADDING_SIZE (sizeof(__u32)) 77 77 #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ 78 78 VALID_XATTR_BLOCK_SIZE) 79 79
+12 -5
include/linux/f2fs_fs.h
··· 32 32 /* 0, 1(node nid), 2(meta nid) are reserved node id */ 33 33 #define F2FS_RESERVED_NODE_NUM 3 34 34 35 - #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) 36 - #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) 37 - #define F2FS_META_INO(sbi) (sbi->meta_ino_num) 35 + #define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num) 36 + #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) 37 + #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) 38 38 39 39 #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ 40 40 #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ ··· 114 114 /* 115 115 * For checkpoint 116 116 */ 117 + #define CP_TRIMMED_FLAG 0x00000100 117 118 #define CP_NAT_BITS_FLAG 0x00000080 118 119 #define CP_CRC_RECOVERY_FLAG 0x00000040 119 120 #define CP_FASTBOOT_FLAG 0x00000020 ··· 162 161 */ 163 162 #define F2FS_ORPHANS_PER_BLOCK 1020 164 163 165 - #define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ 164 + #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ 166 165 F2FS_ORPHANS_PER_BLOCK) 167 166 168 167 struct f2fs_orphan_block { ··· 301 300 */ 302 301 #define SIT_VBLOCK_MAP_SIZE 64 303 302 #define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) 303 + 304 + /* 305 + * F2FS uses 4 bytes to represent block address. As a result, supported size of 306 + * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. 307 + */ 308 + #define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) 304 309 305 310 /* 306 311 * Note that f2fs_sit_entry->vblocks has the following bit-field information. ··· 456 449 #define F2FS_SLOT_LEN 8 457 450 #define F2FS_SLOT_LEN_BITS 3 458 451 459 - #define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) 452 + #define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) 460 453 461 454 /* MAX level for dir lookup */ 462 455 #define MAX_DIR_HASH_DEPTH 63
+41 -21
include/trace/events/f2fs.h
··· 15 15 TRACE_DEFINE_ENUM(META_FLUSH); 16 16 TRACE_DEFINE_ENUM(INMEM); 17 17 TRACE_DEFINE_ENUM(INMEM_DROP); 18 + TRACE_DEFINE_ENUM(INMEM_INVALIDATE); 19 + TRACE_DEFINE_ENUM(INMEM_REVOKE); 18 20 TRACE_DEFINE_ENUM(IPU); 19 21 TRACE_DEFINE_ENUM(OPU); 20 22 TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); ··· 44 42 TRACE_DEFINE_ENUM(CP_SYNC); 45 43 TRACE_DEFINE_ENUM(CP_RECOVERY); 46 44 TRACE_DEFINE_ENUM(CP_DISCARD); 45 + TRACE_DEFINE_ENUM(CP_TRIMMED); 47 46 48 47 #define show_block_type(type) \ 49 48 __print_symbolic(type, \ ··· 54 51 { META_FLUSH, "META_FLUSH" }, \ 55 52 { INMEM, "INMEM" }, \ 56 53 { INMEM_DROP, "INMEM_DROP" }, \ 54 + { INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \ 57 55 { INMEM_REVOKE, "INMEM_REVOKE" }, \ 58 56 { IPU, "IN-PLACE" }, \ 59 57 { OPU, "OUT-OF-PLACE" }) 60 58 61 - #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\ 62 - REQ_PRIO) 59 + #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ 60 + REQ_PREFLUSH | REQ_FUA) 63 61 #define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) 64 62 65 63 #define show_bio_type(op,op_flags) show_bio_op(op), \ ··· 79 75 { REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" }) 80 76 81 77 #define show_bio_op_flags(flags) \ 82 - __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ 83 - { REQ_RAHEAD, "(RA)" }, \ 84 - { REQ_SYNC, "(S)" }, \ 85 - { REQ_SYNC | REQ_PRIO, "(SP)" }, \ 86 - { REQ_META, "(M)" }, \ 87 - { REQ_META | REQ_PRIO, "(MP)" }, \ 88 - { REQ_SYNC | REQ_PREFLUSH , "(SF)" }, \ 89 - { REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" }, \ 90 - { REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" }, \ 91 - { 0, " \b" }) 78 + __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ 79 + { REQ_RAHEAD, "R" }, \ 80 + { REQ_SYNC, "S" }, \ 81 + { REQ_META, "M" }, \ 82 + { REQ_PRIO, "P" }, \ 83 + { REQ_PREFLUSH, "PF" }, \ 84 + { REQ_FUA, "FUA" }) 92 85 93 86 #define show_data_type(type) \ 94 87 __print_symbolic(type, \ ··· 118 117 { GC_CB, "Cost-Benefit" }) 119 118 120 119 #define show_cpreason(type) \ 121 - __print_symbolic(type, \ 120 + __print_flags(type, "|", \ 122 121 { CP_UMOUNT, "Umount" }, \ 123 122 { CP_FASTBOOT, "Fastboot" }, \ 124 123 { CP_SYNC, "Sync" }, \ 125 124 { CP_RECOVERY, "Recovery" }, \ 126 - { CP_DISCARD, "Discard" }) 125 + { CP_DISCARD, "Discard" }, \ 126 + { CP_UMOUNT, "Umount" }, \ 127 + { CP_TRIMMED, "Trimmed" }) 127 128 128 129 struct victim_sel_policy; 129 130 struct f2fs_map_blocks; ··· 772 769 ), 773 770 774 771 TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " 775 - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s", 772 + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s", 776 773 show_dev_ino(__entry), 777 774 (unsigned long)__entry->index, 778 775 (unsigned long long)__entry->old_blkaddr, ··· 825 822 __entry->size = bio->bi_iter.bi_size; 826 823 ), 827 824 828 - TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u", 825 + TP_printk("dev = (%d,%d)/(%d,%d), rw = %s(%s), %s, sector = %lld, size = %u", 829 826 show_dev(__entry->target), 830 827 show_dev(__entry->dev), 831 828 show_bio_type(__entry->op, __entry->op_flags), ··· 1129 1126 __entry->msg) 1130 1127 ); 1131 1128 1132 - TRACE_EVENT(f2fs_issue_discard, 1129 + DECLARE_EVENT_CLASS(f2fs_discard, 1133 1130 1134 1131 TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), 1135 1132 ··· 1151 1148 show_dev(__entry->dev), 1152 1149 (unsigned long long)__entry->blkstart, 1153 1150 (unsigned long long)__entry->blklen) 1151 + ); 1152 + 1153 + DEFINE_EVENT(f2fs_discard, f2fs_queue_discard, 1154 + 1155 + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), 1156 + 1157 + TP_ARGS(dev, blkstart, blklen) 1158 + ); 1159 + 1160 + DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, 1161 + 1162 + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), 1163 + 1164 + TP_ARGS(dev, blkstart, blklen) 1154 1165 ); 1155 1166 1156 1167 TRACE_EVENT(f2fs_issue_reset_zone, ··· 1191 1174 TRACE_EVENT(f2fs_issue_flush, 1192 1175 1193 1176 TP_PROTO(struct block_device *dev, unsigned int nobarrier, 1194 - unsigned int flush_merge), 1177 + unsigned int flush_merge, int ret), 1195 1178 1196 - TP_ARGS(dev, nobarrier, flush_merge), 1179 + TP_ARGS(dev, nobarrier, flush_merge, ret), 1197 1180 1198 1181 TP_STRUCT__entry( 1199 1182 __field(dev_t, dev) 1200 1183 __field(unsigned int, nobarrier) 1201 1184 __field(unsigned int, flush_merge) 1185 + __field(int, ret) 1202 1186 ), 1203 1187 1204 1188 TP_fast_assign( 1205 1189 __entry->dev = dev->bd_dev; 1206 1190 __entry->nobarrier = nobarrier; 1207 1191 __entry->flush_merge = flush_merge; 1192 + __entry->ret = ret; 1208 1193 ), 1209 1194 1210 - TP_printk("dev = (%d,%d), %s %s", 1195 + TP_printk("dev = (%d,%d), %s %s, ret = %d", 1211 1196 show_dev(__entry->dev), 1212 1197 __entry->nobarrier ? "skip (nobarrier)" : "issue", 1213 - __entry->flush_merge ? " with flush_merge" : "") 1198 + __entry->flush_merge ? " with flush_merge" : "", 1199 + __entry->ret) 1214 1200 ); 1215 1201 1216 1202 TRACE_EVENT(f2fs_lookup_extent_tree_start,