Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: add async reset zone command support

This patch enables submit reset zone command asynchornously. It helps
decrease average latency of write IOs in high utilization scenario by
faster checkpointing.

Signed-off-by: Daejun Park <daejun7.park@samsung.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Daejun Park and committed by
Jaegeuk Kim
25f90805 901c12d1

+104 -6
+1
fs/f2fs/f2fs.h
··· 1176 1176 /* other */ 1177 1177 FS_DISCARD_IO, /* discard */ 1178 1178 FS_FLUSH_IO, /* flush */ 1179 + FS_ZONE_RESET_IO, /* zone reset */ 1179 1180 NR_IO_TYPE, 1180 1181 }; 1181 1182
+1
fs/f2fs/iostat.c
··· 80 80 seq_puts(seq, "[OTHER]\n"); 81 81 IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO); 82 82 IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO); 83 + IOSTAT_INFO_SHOW("fs zone reset", FS_ZONE_RESET_IO); 83 84 84 85 return 0; 85 86 }
+81 -3
fs/f2fs/segment.c
··· 1196 1196 static void __update_discard_tree_range(struct f2fs_sb_info *sbi, 1197 1197 struct block_device *bdev, block_t lstart, 1198 1198 block_t start, block_t len); 1199 + 1200 + #ifdef CONFIG_BLK_DEV_ZONED 1201 + static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi, 1202 + struct discard_cmd *dc, blk_opf_t flag, 1203 + struct list_head *wait_list, 1204 + unsigned int *issued) 1205 + { 1206 + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1207 + struct block_device *bdev = dc->bdev; 1208 + struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS); 1209 + unsigned long flags; 1210 + 1211 + trace_f2fs_issue_reset_zone(bdev, dc->di.start); 1212 + 1213 + spin_lock_irqsave(&dc->lock, flags); 1214 + dc->state = D_SUBMIT; 1215 + dc->bio_ref++; 1216 + spin_unlock_irqrestore(&dc->lock, flags); 1217 + 1218 + if (issued) 1219 + (*issued)++; 1220 + 1221 + atomic_inc(&dcc->queued_discard); 1222 + dc->queued++; 1223 + list_move_tail(&dc->list, wait_list); 1224 + 1225 + /* sanity check on discard range */ 1226 + __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len); 1227 + 1228 + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start); 1229 + bio->bi_private = dc; 1230 + bio->bi_end_io = f2fs_submit_discard_endio; 1231 + submit_bio(bio); 1232 + 1233 + atomic_inc(&dcc->issued_discard); 1234 + f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE); 1235 + } 1236 + #endif 1237 + 1199 1238 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 1200 1239 static int __submit_discard_cmd(struct f2fs_sb_info *sbi, 1201 1240 struct discard_policy *dpolicy, ··· 1255 1216 1256 1217 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1257 1218 return 0; 1219 + 1220 + #ifdef CONFIG_BLK_DEV_ZONED 1221 + if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { 1222 + __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); 1223 + return 0; 1224 + } 1225 + #endif 1258 1226 1259 1227 trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); 1260 1228 ··· 1506 1460 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1507 1461 } 1508 1462 } 1463 + 1464 + #ifdef CONFIG_BLK_DEV_ZONED 1465 + static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi, 1466 + struct block_device *bdev, block_t blkstart, block_t lblkstart, 1467 + block_t blklen) 1468 + { 1469 + trace_f2fs_queue_reset_zone(bdev, blkstart); 1470 + 1471 + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); 1472 + __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); 1473 + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); 1474 + } 1475 + #endif 1509 1476 1510 1477 static void __queue_discard_cmd(struct f2fs_sb_info *sbi, 1511 1478 struct block_device *bdev, block_t blkstart, block_t blklen) ··· 1783 1724 1784 1725 mutex_lock(&dcc->cmd_lock); 1785 1726 dc = __lookup_discard_cmd(sbi, blkaddr); 1727 + #ifdef CONFIG_BLK_DEV_ZONED 1728 + if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { 1729 + /* force submit zone reset */ 1730 + if (dc->state == D_PREP) 1731 + __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, 1732 + &dcc->wait_list, NULL); 1733 + dc->ref++; 1734 + mutex_unlock(&dcc->cmd_lock); 1735 + /* wait zone reset */ 1736 + __wait_one_discard_bio(sbi, dc); 1737 + return; 1738 + } 1739 + #endif 1786 1740 if (dc) { 1787 1741 if (dc->state == D_PREP) { 1788 1742 __punch_discard_cmd(sbi, dc, blkaddr); ··· 1948 1876 blkstart, blklen); 1949 1877 return -EIO; 1950 1878 } 1951 - trace_f2fs_issue_reset_zone(bdev, blkstart); 1952 - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 1953 - sector, nr_sects, GFP_NOFS); 1879 + 1880 + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { 1881 + trace_f2fs_issue_reset_zone(bdev, blkstart); 1882 + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 1883 + sector, nr_sects, GFP_NOFS); 1884 + } 1885 + 1886 + __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); 1887 + return 0; 1954 1888 } 1955 1889 1956 1890 /* For conventional zones, use regular discard if supported */
+21 -3
include/trace/events/f2fs.h
··· 1512 1512 TP_ARGS(dev, blkstart, blklen) 1513 1513 ); 1514 1514 1515 - TRACE_EVENT(f2fs_issue_reset_zone, 1515 + DECLARE_EVENT_CLASS(f2fs_reset_zone, 1516 1516 1517 1517 TP_PROTO(struct block_device *dev, block_t blkstart), 1518 1518 ··· 1528 1528 __entry->blkstart = blkstart; 1529 1529 ), 1530 1530 1531 - TP_printk("dev = (%d,%d), reset zone at block = 0x%llx", 1531 + TP_printk("dev = (%d,%d), zone at block = 0x%llx", 1532 1532 show_dev(__entry->dev), 1533 1533 (unsigned long long)__entry->blkstart) 1534 + ); 1535 + 1536 + DEFINE_EVENT(f2fs_reset_zone, f2fs_queue_reset_zone, 1537 + 1538 + TP_PROTO(struct block_device *dev, block_t blkstart), 1539 + 1540 + TP_ARGS(dev, blkstart) 1541 + ); 1542 + 1543 + DEFINE_EVENT(f2fs_reset_zone, f2fs_issue_reset_zone, 1544 + 1545 + TP_PROTO(struct block_device *dev, block_t blkstart), 1546 + 1547 + TP_ARGS(dev, blkstart) 1534 1548 ); 1535 1549 1536 1550 TRACE_EVENT(f2fs_issue_flush, ··· 1993 1979 __field(unsigned long long, fs_nrio) 1994 1980 __field(unsigned long long, fs_mrio) 1995 1981 __field(unsigned long long, fs_discard) 1982 + __field(unsigned long long, fs_reset_zone) 1996 1983 ), 1997 1984 1998 1985 TP_fast_assign( ··· 2025 2010 __entry->fs_nrio = iostat[FS_NODE_READ_IO]; 2026 2011 __entry->fs_mrio = iostat[FS_META_READ_IO]; 2027 2012 __entry->fs_discard = iostat[FS_DISCARD_IO]; 2013 + __entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO]; 2028 2014 ), 2029 2015 2030 2016 TP_printk("dev = (%d,%d), " 2031 2017 "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu, " 2032 2018 "compr(buffered=%llu, mapped=%llu)], " 2033 - "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu], " 2019 + "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu, " 2020 + "reset_zone=%llu], " 2034 2021 "gc [data=%llu, node=%llu], " 2035 2022 "cp [data=%llu, node=%llu, meta=%llu], " 2036 2023 "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " ··· 2043 2026 __entry->app_bio, __entry->app_mio, __entry->app_bcdio, 2044 2027 __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, 2045 2028 __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, 2029 + __entry->fs_reset_zone, 2046 2030 __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, 2047 2031 __entry->fs_cp_nio, __entry->fs_cp_mio, 2048 2032 __entry->app_rio, __entry->app_drio, __entry->app_brio,