Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: implement writeback livelock avoidance using page tagging

While the mapped IOs continue if we try to flush a file's buffer
we can see that the fsync() won't complete until the IOs finish.

This is analogous to Jan Kara's commit (f446daaea9d4 mm: implement
writeback livelock avoidance using page tagging), we will try to
avoid livelocks of writeback when some steadily creates dirty pages
in a mapping we are writing out.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Xiubo Li and committed by
Ilya Dryomov
7d41870d 7a6c3a03

+10 -1
+10 -1
fs/ceph/addr.c
··· 808 808 bool should_loop, range_whole = false; 809 809 bool done = false; 810 810 bool caching = ceph_is_cache_enabled(inode); 811 + xa_mark_t tag; 811 812 812 813 if (wbc->sync_mode == WB_SYNC_NONE && 813 814 fsc->write_congested) ··· 835 834 start_index = wbc->range_cyclic ? mapping->writeback_index : 0; 836 835 index = start_index; 837 836 837 + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { 838 + tag = PAGECACHE_TAG_TOWRITE; 839 + } else { 840 + tag = PAGECACHE_TAG_DIRTY; 841 + } 838 842 retry: 839 843 /* find oldest snap context with dirty data */ 840 844 snapc = get_oldest_context(inode, &ceph_wbc, NULL); ··· 878 872 dout(" non-head snapc, range whole\n"); 879 873 } 880 874 875 + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 876 + tag_pages_for_writeback(mapping, index, end); 877 + 881 878 ceph_put_snap_context(last_snapc); 882 879 last_snapc = snapc; 883 880 ··· 897 888 898 889 get_more_pages: 899 890 nr_folios = filemap_get_folios_tag(mapping, &index, 900 - end, PAGECACHE_TAG_DIRTY, &fbatch); 891 + end, tag, &fbatch); 901 892 dout("pagevec_lookup_range_tag got %d\n", nr_folios); 902 893 if (!nr_folios && !locked_pages) 903 894 break;