Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iomap: optimize pending async writeback accounting

Pending writebacks must be accounted for to determine when all requests
have completed and writeback on the folio should be ended. Currently
this is done by atomically incrementing ifs->write_bytes_pending for
every range to be written back.

Instead, the number of atomic operations can be minimized by setting
ifs->write_bytes_pending to the folio size, internally tracking how many
bytes are written back asynchronously, and then after sending off all
the requests, decrementing ifs->write_bytes_pending by the number of
bytes not written back asynchronously. Now, for N ranges written back,
only N + 2 atomic operations are required instead of 2N + 2.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20251111193658.3495942-5-joannelkoong@gmail.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Joanne Koong and committed by
Christian Brauner
6b1fd228 7e6cea5a

+36 -30
+2 -2
fs/fuse/file.c
··· 1885 1885 * scope of the fi->lock alleviates xarray lock 1886 1886 * contention and noticeably improves performance. 1887 1887 */ 1888 - iomap_finish_folio_write(inode, ap->folios[i], 1); 1888 + iomap_finish_folio_write(inode, ap->folios[i], 1889 + ap->descs[i].length); 1889 1890 1890 1891 wake_up(&fi->page_waitq); 1891 1892 } ··· 2222 2221 ap = &wpa->ia.ap; 2223 2222 } 2224 2223 2225 - iomap_start_folio_write(inode, folio, 1); 2226 2224 fuse_writepage_args_page_fill(wpa, folio, ap->num_folios, 2227 2225 offset, len); 2228 2226 data->nr_bytes += len;
+34 -24
fs/iomap/buffered-io.c
··· 1641 1641 } 1642 1642 EXPORT_SYMBOL_GPL(iomap_page_mkwrite); 1643 1643 1644 - void iomap_start_folio_write(struct inode *inode, struct folio *folio, 1645 - size_t len) 1644 + static void iomap_writeback_init(struct inode *inode, struct folio *folio) 1646 1645 { 1647 1646 struct iomap_folio_state *ifs = folio->private; 1648 1647 1649 1648 WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); 1650 - if (ifs) 1651 - atomic_add(len, &ifs->write_bytes_pending); 1649 + if (ifs) { 1650 + WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); 1651 + /* 1652 + * Set this to the folio size. After processing the folio for 1653 + * writeback in iomap_writeback_folio(), we'll subtract any 1654 + * ranges not written back. 1655 + * 1656 + * We do this because otherwise, we would have to atomically 1657 + * increment ifs->write_bytes_pending every time a range in the 1658 + * folio needs to be written back. 1659 + */ 1660 + atomic_set(&ifs->write_bytes_pending, folio_size(folio)); 1661 + } 1652 1662 } 1653 - EXPORT_SYMBOL_GPL(iomap_start_folio_write); 1654 1663 1655 1664 void iomap_finish_folio_write(struct inode *inode, struct folio *folio, 1656 1665 size_t len) ··· 1676 1667 1677 1668 static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, 1678 1669 struct folio *folio, u64 pos, u32 rlen, u64 end_pos, 1679 - bool *wb_pending) 1670 + size_t *bytes_submitted) 1680 1671 { 1681 1672 do { 1682 1673 ssize_t ret; ··· 1690 1681 pos += ret; 1691 1682 1692 1683 /* 1693 - * Holes are not be written back by ->writeback_range, so track 1684 + * Holes are not written back by ->writeback_range, so track 1694 1685 * if we did handle anything that is not a hole here. 1695 1686 */ 1696 1687 if (wpc->iomap.type != IOMAP_HOLE) 1697 - *wb_pending = true; 1688 + *bytes_submitted += ret; 1698 1689 } while (rlen); 1699 1690 1700 1691 return 0; ··· 1765 1756 u64 pos = folio_pos(folio); 1766 1757 u64 end_pos = pos + folio_size(folio); 1767 1758 u64 end_aligned = 0; 1768 - bool wb_pending = false; 1759 + size_t bytes_submitted = 0; 1769 1760 int error = 0; 1770 1761 u32 rlen; 1771 1762 ··· 1785 1776 iomap_set_range_dirty(folio, 0, end_pos - pos); 1786 1777 } 1787 1778 1788 - /* 1789 - * Keep the I/O completion handler from clearing the writeback 1790 - * bit until we have submitted all blocks by adding a bias to 1791 - * ifs->write_bytes_pending, which is dropped after submitting 1792 - * all blocks. 1793 - */ 1794 - WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); 1795 - iomap_start_folio_write(inode, folio, 1); 1779 + iomap_writeback_init(inode, folio); 1796 1780 } 1797 1781 1798 1782 /* ··· 1800 1798 end_aligned = round_up(end_pos, i_blocksize(inode)); 1801 1799 while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { 1802 1800 error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, 1803 - &wb_pending); 1801 + &bytes_submitted); 1804 1802 if (error) 1805 1803 break; 1806 1804 pos += rlen; 1807 1805 } 1808 1806 1809 - if (wb_pending) 1807 + if (bytes_submitted) 1810 1808 wpc->nr_folios++; 1811 1809 1812 1810 /* ··· 1824 1822 * bit ourselves right after unlocking the page. 1825 1823 */ 1826 1824 if (ifs) { 1827 - if (atomic_dec_and_test(&ifs->write_bytes_pending)) 1828 - folio_end_writeback(folio); 1829 - } else { 1830 - if (!wb_pending) 1831 - folio_end_writeback(folio); 1825 + /* 1826 + * Subtract any bytes that were initially accounted to 1827 + * write_bytes_pending but skipped for writeback. 1828 + */ 1829 + size_t bytes_not_submitted = folio_size(folio) - 1830 + bytes_submitted; 1831 + 1832 + if (bytes_not_submitted) 1833 + iomap_finish_folio_write(inode, folio, 1834 + bytes_not_submitted); 1835 + } else if (!bytes_submitted) { 1836 + folio_end_writeback(folio); 1832 1837 } 1838 + 1833 1839 mapping_set_error(inode->i_mapping, error); 1834 1840 return error; 1835 1841 }
-2
fs/iomap/ioend.c
··· 194 194 if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) 195 195 goto new_ioend; 196 196 197 - iomap_start_folio_write(wpc->inode, folio, map_len); 198 - 199 197 /* 200 198 * Clamp io_offset and io_size to the incore EOF so that ondisk 201 199 * file size updates in the ioend completion are byte-accurate.
-2
include/linux/iomap.h
··· 478 478 479 479 void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, 480 480 int error); 481 - void iomap_start_folio_write(struct inode *inode, struct folio *folio, 482 - size_t len); 483 481 void iomap_finish_folio_write(struct inode *inode, struct folio *folio, 484 482 size_t len); 485 483