Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: add fscache writeback support

When updating the backing store from the pagecache (a'la writepage or
writepages), write to the cache first. This allows us to keep caching
files even when they are being written, as long as we have appropriate
caps.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20211129162907.149445-3-jlayton@kernel.org/ # v1
Link: https://lore.kernel.org/r/20211207134451.66296-3-jlayton@kernel.org/ # v2
Link: https://lore.kernel.org/r/163906985808.143852.1383891557313186623.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967190257.1823006.16713609520911954804.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021585020.640689.6765214932458435472.stgit@warthog.procyon.org.uk/ # v4

authored by

Jeff Layton and committed by
David Howells
1702e797 400e1286

+59 -8
+59 -8
fs/ceph/addr.c
··· 5 5 #include <linux/fs.h> 6 6 #include <linux/mm.h> 7 7 #include <linux/pagemap.h> 8 - #include <linux/writeback.h> /* generic_writepages */ 9 8 #include <linux/slab.h> 10 9 #include <linux/pagevec.h> 11 10 #include <linux/task_io_accounting_ops.h> ··· 383 384 netfs_readahead(ractl, &ceph_netfs_read_ops, (void *)(uintptr_t)got); 384 385 } 385 386 387 + #ifdef CONFIG_CEPH_FSCACHE 388 + static void ceph_set_page_fscache(struct page *page) 389 + { 390 + set_page_fscache(page); 391 + } 392 + 393 + static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) 394 + { 395 + struct inode *inode = priv; 396 + 397 + if (IS_ERR_VALUE(error) && error != -ENOBUFS) 398 + ceph_fscache_invalidate(inode, false); 399 + } 400 + 401 + static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 402 + { 403 + struct ceph_inode_info *ci = ceph_inode(inode); 404 + struct fscache_cookie *cookie = ceph_fscache_cookie(ci); 405 + 406 + fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), 407 + ceph_fscache_write_terminated, inode, caching); 408 + } 409 + #else 410 + static inline void ceph_set_page_fscache(struct page *page) 411 + { 412 + } 413 + 414 + static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 415 + { 416 + } 417 + #endif /* CONFIG_CEPH_FSCACHE */ 418 + 386 419 struct ceph_writeback_ctl 387 420 { 388 421 loff_t i_size; ··· 530 499 struct ceph_writeback_ctl ceph_wbc; 531 500 struct ceph_osd_client *osdc = &fsc->client->osdc; 532 501 struct ceph_osd_request *req; 502 + bool caching = ceph_is_cache_enabled(inode); 533 503 534 504 dout("writepage %p idx %lu\n", page, page->index); 535 505 ··· 569 537 CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) 570 538 set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 571 539 572 - set_page_writeback(page); 573 540 req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1, 574 541 CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, 575 542 ceph_wbc.truncate_seq, ceph_wbc.truncate_size, 576 543 true); 577 - if (IS_ERR(req)) { 578 - redirty_page_for_writepage(wbc, page); 579 - end_page_writeback(page); 544 + if (IS_ERR(req)) 580 545 return PTR_ERR(req); 581 - } 546 + 547 + set_page_writeback(page); 548 + if (caching) 549 + ceph_set_page_fscache(page); 550 + ceph_fscache_write_to_cache(inode, page_off, len, caching); 582 551 583 552 /* it may be a short write due to an object boundary */ 584 553 WARN_ON_ONCE(len > thp_size(page)); ··· 638 605 struct inode *inode = page->mapping->host; 639 606 BUG_ON(!inode); 640 607 ihold(inode); 608 + 609 + wait_on_page_fscache(page); 610 + 641 611 err = writepage_nounlock(page, wbc); 642 612 if (err == -ERESTARTSYS) { 643 613 /* direct memory reclaimer was killed by SIGKILL. return 0 ··· 762 726 struct ceph_writeback_ctl ceph_wbc; 763 727 bool should_loop, range_whole = false; 764 728 bool done = false; 729 + bool caching = ceph_is_cache_enabled(inode); 765 730 766 731 dout("writepages_start %p (mode=%s)\n", inode, 767 732 wbc->sync_mode == WB_SYNC_NONE ? "NONE" : ··· 886 849 unlock_page(page); 887 850 break; 888 851 } 889 - if (PageWriteback(page)) { 852 + if (PageWriteback(page) || PageFsCache(page)) { 890 853 if (wbc->sync_mode == WB_SYNC_NONE) { 891 854 dout("%p under writeback\n", page); 892 855 unlock_page(page); ··· 894 857 } 895 858 dout("waiting on writeback %p\n", page); 896 859 wait_on_page_writeback(page); 860 + wait_on_page_fscache(page); 897 861 } 898 862 899 863 if (!clear_page_dirty_for_io(page)) { ··· 1027 989 op_idx = 0; 1028 990 for (i = 0; i < locked_pages; i++) { 1029 991 u64 cur_offset = page_offset(pages[i]); 992 + /* 993 + * Discontinuity in page range? Ceph can handle that by just passing 994 + * multiple extents in the write op. 995 + */ 1030 996 if (offset + len != cur_offset) { 997 + /* If it's full, stop here */ 1031 998 if (op_idx + 1 == req->r_num_ops) 1032 999 break; 1000 + 1001 + /* Kick off an fscache write with what we have so far. */ 1002 + ceph_fscache_write_to_cache(inode, offset, len, caching); 1003 + 1004 + /* Start a new extent */ 1033 1005 osd_req_op_extent_dup_last(req, op_idx, 1034 1006 cur_offset - offset); 1035 1007 dout("writepages got pages at %llu~%llu\n", ··· 1050 1002 osd_req_op_extent_update(req, op_idx, len); 1051 1003 1052 1004 len = 0; 1053 - offset = cur_offset; 1005 + offset = cur_offset; 1054 1006 data_pages = pages + i; 1055 1007 op_idx++; 1056 1008 } 1057 1009 1058 1010 set_page_writeback(pages[i]); 1011 + if (caching) 1012 + ceph_set_page_fscache(pages[i]); 1059 1013 len += thp_size(page); 1060 1014 } 1015 + ceph_fscache_write_to_cache(inode, offset, len, caching); 1061 1016 1062 1017 if (ceph_wbc.size_stable) { 1063 1018 len = min(len, ceph_wbc.i_size - offset);