Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: enhance multiple device flush

When multiple device feature is enabled, during ->fsync we will issue
flush in all devices to make sure node/data of the file being persisted
into storage. But some flushes of device could be unneeded as file's
data may be not writebacked into those devices. So this patch adds and
manage bitmap per inode in global cache to indicate which device is
dirty and it needs to issue flush during ->fsync, hence, we could improve
performance of fsync in scenario of multiple device.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Chao Yu and committed by
Jaegeuk Kim
39d787be b77061bf

+86 -21
+31 -5
fs/f2fs/checkpoint.c
··· 401 401 #endif 402 402 }; 403 403 404 - static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 404 + static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, 405 + unsigned int devidx, int type) 405 406 { 406 407 struct inode_management *im = &sbi->im[type]; 407 408 struct ino_entry *e, *tmp; ··· 427 426 if (type != ORPHAN_INO) 428 427 im->ino_num++; 429 428 } 429 + 430 + if (type == FLUSH_INO) 431 + f2fs_set_bit(devidx, (char *)&e->dirty_device); 432 + 430 433 spin_unlock(&im->ino_lock); 431 434 radix_tree_preload_end(); 432 435 ··· 459 454 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 460 455 { 461 456 /* add new dirty ino entry into list */ 462 - __add_ino_entry(sbi, ino, type); 457 + __add_ino_entry(sbi, ino, 0, type); 463 458 } 464 459 465 460 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) ··· 485 480 struct ino_entry *e, *tmp; 486 481 int i; 487 482 488 - for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 483 + for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) { 489 484 struct inode_management *im = &sbi->im[i]; 490 485 491 486 spin_lock(&im->ino_lock); ··· 497 492 } 498 493 spin_unlock(&im->ino_lock); 499 494 } 495 + } 496 + 497 + void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 498 + unsigned int devidx, int type) 499 + { 500 + __add_ino_entry(sbi, ino, devidx, type); 501 + } 502 + 503 + bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 504 + unsigned int devidx, int type) 505 + { 506 + struct inode_management *im = &sbi->im[type]; 507 + struct ino_entry *e; 508 + bool is_dirty = false; 509 + 510 + spin_lock(&im->ino_lock); 511 + e = radix_tree_lookup(&im->ino_root, ino); 512 + if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device)) 513 + is_dirty = true; 514 + spin_unlock(&im->ino_lock); 515 + return is_dirty; 500 516 } 501 517 502 518 int acquire_orphan_inode(struct f2fs_sb_info *sbi) ··· 556 530 void add_orphan_inode(struct inode *inode) 557 531 { 558 532 /* add new orphan ino entry into list */ 559 - __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); 533 + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); 560 534 update_inode_page(inode); 561 535 } 562 536 ··· 580 554 return err; 581 555 } 582 556 583 - __add_ino_entry(sbi, ino, ORPHAN_INO); 557 + __add_ino_entry(sbi, ino, 0, ORPHAN_INO); 584 558 585 559 inode = f2fs_iget_retry(sbi->sb, ino); 586 560 if (IS_ERR(inode)) {
+1
fs/f2fs/data.c
··· 1498 1498 int err = 0; 1499 1499 struct f2fs_io_info fio = { 1500 1500 .sbi = sbi, 1501 + .ino = inode->i_ino, 1501 1502 .type = DATA, 1502 1503 .op = REQ_OP_WRITE, 1503 1504 .op_flags = wbc_to_write_flags(wbc),
+11 -3
fs/f2fs/f2fs.h
··· 177 177 ORPHAN_INO, /* for orphan ino list */ 178 178 APPEND_INO, /* for append ino list */ 179 179 UPDATE_INO, /* for update ino list */ 180 + FLUSH_INO, /* for multiple device flushing */ 180 181 MAX_INO_ENTRY, /* max. list */ 181 182 }; 182 183 183 184 struct ino_entry { 184 - struct list_head list; /* list head */ 185 - nid_t ino; /* inode number */ 185 + struct list_head list; /* list head */ 186 + nid_t ino; /* inode number */ 187 + unsigned int dirty_device; /* dirty device bitmap */ 186 188 }; 187 189 188 190 /* for the list of inodes to be GCed */ ··· 776 774 struct flush_cmd { 777 775 struct completion wait; 778 776 struct llist_node llnode; 777 + nid_t ino; 779 778 int ret; 780 779 }; 781 780 ··· 904 901 905 902 struct f2fs_io_info { 906 903 struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ 904 + nid_t ino; /* inode number */ 907 905 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ 908 906 enum temp_type temp; /* contains HOT/WARM/COLD */ 909 907 int op; /* contains REQ_OP_ */ ··· 2525 2521 int commit_inmem_pages(struct inode *inode); 2526 2522 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); 2527 2523 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); 2528 - int f2fs_issue_flush(struct f2fs_sb_info *sbi); 2524 + int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); 2529 2525 int create_flush_cmd_control(struct f2fs_sb_info *sbi); 2530 2526 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); 2531 2527 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); ··· 2587 2583 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); 2588 2584 void release_ino_entry(struct f2fs_sb_info *sbi, bool all); 2589 2585 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); 2586 + void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 2587 + unsigned int devidx, int type); 2588 + bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, 2589 + unsigned int devidx, int type); 2590 2590 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); 2591 2591 int acquire_orphan_inode(struct f2fs_sb_info *sbi); 2592 2592 void release_orphan_inode(struct f2fs_sb_info *sbi);
+2 -1
fs/f2fs/file.c
··· 295 295 clear_inode_flag(inode, FI_APPEND_WRITE); 296 296 flush_out: 297 297 if (!atomic) 298 - ret = f2fs_issue_flush(sbi); 298 + ret = f2fs_issue_flush(sbi, inode->i_ino); 299 299 if (!ret) { 300 300 remove_ino_entry(sbi, ino, UPDATE_INO); 301 301 clear_inode_flag(inode, FI_UPDATE_WRITE); 302 + remove_ino_entry(sbi, ino, FLUSH_INO); 302 303 } 303 304 f2fs_update_time(sbi, REQ_TIME); 304 305 out:
+2
fs/f2fs/gc.c
··· 598 598 { 599 599 struct f2fs_io_info fio = { 600 600 .sbi = F2FS_I_SB(inode), 601 + .ino = inode->i_ino, 601 602 .type = DATA, 602 603 .temp = COLD, 603 604 .op = REQ_OP_READ, ··· 729 728 } else { 730 729 struct f2fs_io_info fio = { 731 730 .sbi = F2FS_I_SB(inode), 731 + .ino = inode->i_ino, 732 732 .type = DATA, 733 733 .temp = COLD, 734 734 .op = REQ_OP_WRITE,
+1
fs/f2fs/inline.c
··· 112 112 { 113 113 struct f2fs_io_info fio = { 114 114 .sbi = F2FS_I_SB(dn->inode), 115 + .ino = dn->inode->i_ino, 115 116 .type = DATA, 116 117 .op = REQ_OP_WRITE, 117 118 .op_flags = REQ_SYNC | REQ_PRIO,
+1
fs/f2fs/inode.c
··· 480 480 481 481 remove_ino_entry(sbi, inode->i_ino, APPEND_INO); 482 482 remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); 483 + remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); 483 484 484 485 sb_start_intwrite(inode->i_sb); 485 486 set_inode_flag(inode, FI_NO_ALLOC);
+2 -1
fs/f2fs/node.c
··· 63 63 } else if (type == INO_ENTRIES) { 64 64 int i; 65 65 66 - for (i = 0; i <= UPDATE_INO; i++) 66 + for (i = 0; i < MAX_INO_ENTRY; i++) 67 67 mem_size += sbi->im[i].ino_num * 68 68 sizeof(struct ino_entry); 69 69 mem_size >>= PAGE_SHIFT; ··· 1340 1340 struct node_info ni; 1341 1341 struct f2fs_io_info fio = { 1342 1342 .sbi = sbi, 1343 + .ino = ino_of_node(page), 1343 1344 .type = NODE, 1344 1345 .op = REQ_OP_WRITE, 1345 1346 .op_flags = wbc_to_write_flags(wbc),
+35 -11
fs/f2fs/segment.c
··· 313 313 struct inmem_pages *cur, *tmp; 314 314 struct f2fs_io_info fio = { 315 315 .sbi = sbi, 316 + .ino = inode->i_ino, 316 317 .type = DATA, 317 318 .op = REQ_OP_WRITE, 318 319 .op_flags = REQ_SYNC | REQ_PRIO, ··· 486 485 return ret; 487 486 } 488 487 489 - static int submit_flush_wait(struct f2fs_sb_info *sbi) 488 + static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) 490 489 { 491 - int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); 490 + int ret = 0; 492 491 int i; 493 492 494 - if (!sbi->s_ndevs || ret) 495 - return ret; 493 + if (!sbi->s_ndevs) 494 + return __submit_flush_wait(sbi, sbi->sb->s_bdev); 496 495 497 - for (i = 1; i < sbi->s_ndevs; i++) { 496 + for (i = 0; i < sbi->s_ndevs; i++) { 497 + if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) 498 + continue; 498 499 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 499 500 if (ret) 500 501 break; ··· 522 519 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 523 520 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 524 521 525 - ret = submit_flush_wait(sbi); 522 + cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); 523 + 524 + ret = submit_flush_wait(sbi, cmd->ino); 526 525 atomic_inc(&fcc->issued_flush); 527 526 528 527 llist_for_each_entry_safe(cmd, next, ··· 542 537 goto repeat; 543 538 } 544 539 545 - int f2fs_issue_flush(struct f2fs_sb_info *sbi) 540 + int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) 546 541 { 547 542 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 548 543 struct flush_cmd cmd; ··· 552 547 return 0; 553 548 554 549 if (!test_opt(sbi, FLUSH_MERGE)) { 555 - ret = submit_flush_wait(sbi); 550 + ret = submit_flush_wait(sbi, ino); 556 551 atomic_inc(&fcc->issued_flush); 557 552 return ret; 558 553 } 559 554 560 - if (atomic_inc_return(&fcc->issing_flush) == 1) { 561 - ret = submit_flush_wait(sbi); 555 + if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { 556 + ret = submit_flush_wait(sbi, ino); 562 557 atomic_dec(&fcc->issing_flush); 563 558 564 559 atomic_inc(&fcc->issued_flush); 565 560 return ret; 566 561 } 567 562 563 + cmd.ino = ino; 568 564 init_completion(&cmd.wait); 569 565 570 566 llist_add(&cmd.llnode, &fcc->issue_list); ··· 589 583 } else { 590 584 struct flush_cmd *tmp, *next; 591 585 592 - ret = submit_flush_wait(sbi); 586 + ret = submit_flush_wait(sbi, ino); 593 587 594 588 llist_for_each_entry_safe(tmp, next, list, llnode) { 595 589 if (tmp == &cmd) { ··· 2390 2384 mutex_unlock(&curseg->curseg_mutex); 2391 2385 } 2392 2386 2387 + static void update_device_state(struct f2fs_io_info *fio) 2388 + { 2389 + struct f2fs_sb_info *sbi = fio->sbi; 2390 + unsigned int devidx; 2391 + 2392 + if (!sbi->s_ndevs) 2393 + return; 2394 + 2395 + devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); 2396 + 2397 + /* update device state for fsync */ 2398 + set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); 2399 + } 2400 + 2393 2401 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 2394 2402 { 2395 2403 int type = __get_segment_type(fio); ··· 2418 2398 if (err == -EAGAIN) { 2419 2399 fio->old_blkaddr = fio->new_blkaddr; 2420 2400 goto reallocate; 2401 + } else if (!err) { 2402 + update_device_state(fio); 2421 2403 } 2422 2404 } 2423 2405 ··· 2480 2458 stat_inc_inplace_blocks(fio->sbi); 2481 2459 2482 2460 err = f2fs_submit_page_bio(fio); 2461 + if (!err) 2462 + update_device_state(fio); 2483 2463 2484 2464 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 2485 2465