Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iomap: replace folio_batch allocation with stack allocation

Zhang Yi points out that the dynamic folio_batch allocation in
iomap_fill_dirty_folios() is problematic for the ext4 on iomap work
that is under development because it doesn't sufficiently handle the
allocation failure case (by allowing a retry, for example). We've
also seen lockdep (via syzbot) complain recently about the scope of
the allocation.

The dynamic allocation was initially added for simplicity and to
help indicate whether the batch was used or not by the calling fs.
To address these issues, put the batch on the stack of
iomap_zero_range() and use a flag to control whether the batch
should be used in the iomap folio lookup path. This keeps things
simple and eliminates allocation issues with lockdep and for ext4 on
iomap.

While here, also clean up the fill helper signature to be more
consistent with the underlying filemap helper. Pass through the
return value of the filemap helper (folio count) and update the
lookup offset via an out param.

Fixes: 395ed1ef0012 ("iomap: optional zero range dirty folio processing")
Signed-off-by: Brian Foster <bfoster@redhat.com>
Link: https://patch.msgid.link/20251208140548.373411-1-bfoster@redhat.com
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Brian Foster and committed by
Christian Brauner
ed61378b a260bd22

+50 -25
+35 -15
fs/iomap/buffered-io.c
··· 832 832 if (!mapping_large_folio_support(iter->inode->i_mapping)) 833 833 len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); 834 834 835 - if (iter->fbatch) { 835 + if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { 836 836 struct folio *folio = folio_batch_next(iter->fbatch); 837 837 838 838 if (!folio) ··· 929 929 * process so return and let the caller iterate and refill the batch. 930 930 */ 931 931 if (!folio) { 932 - WARN_ON_ONCE(!iter->fbatch); 932 + WARN_ON_ONCE(!(iter->iomap.flags & IOMAP_F_FOLIO_BATCH)); 933 933 return 0; 934 934 } 935 935 ··· 1544 1544 return status; 1545 1545 } 1546 1546 1547 - loff_t 1547 + /** 1548 + * iomap_fill_dirty_folios - fill a folio batch with dirty folios 1549 + * @iter: Iteration structure 1550 + * @start: Start offset of range. Updated based on lookup progress. 1551 + * @end: End offset of range 1552 + * @iomap_flags: Flags to set on the associated iomap to track the batch. 1553 + * 1554 + * Returns the folio count directly. Also returns the associated control flag if 1555 + * the the batch lookup is performed and the expected offset of a subsequent 1556 + * lookup via out params. The caller is responsible to set the flag on the 1557 + * associated iomap. 1558 + */ 1559 + unsigned int 1548 1560 iomap_fill_dirty_folios( 1549 1561 struct iomap_iter *iter, 1550 - loff_t offset, 1551 - loff_t length) 1562 + loff_t *start, 1563 + loff_t end, 1564 + unsigned int *iomap_flags) 1552 1565 { 1553 1566 struct address_space *mapping = iter->inode->i_mapping; 1554 - pgoff_t start = offset >> PAGE_SHIFT; 1555 - pgoff_t end = (offset + length - 1) >> PAGE_SHIFT; 1567 + pgoff_t pstart = *start >> PAGE_SHIFT; 1568 + pgoff_t pend = (end - 1) >> PAGE_SHIFT; 1569 + unsigned int count; 1556 1570 1557 - iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL); 1558 - if (!iter->fbatch) 1559 - return offset + length; 1560 - folio_batch_init(iter->fbatch); 1571 + if (!iter->fbatch) { 1572 + *start = end; 1573 + return 0; 1574 + } 1561 1575 1562 - filemap_get_folios_dirty(mapping, &start, end, iter->fbatch); 1563 - return (start << PAGE_SHIFT); 1576 + count = filemap_get_folios_dirty(mapping, &pstart, pend, iter->fbatch); 1577 + *start = (pstart << PAGE_SHIFT); 1578 + *iomap_flags |= IOMAP_F_FOLIO_BATCH; 1579 + return count; 1564 1580 } 1565 1581 EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios); 1566 1582 ··· 1585 1569 const struct iomap_ops *ops, 1586 1570 const struct iomap_write_ops *write_ops, void *private) 1587 1571 { 1572 + struct folio_batch fbatch; 1588 1573 struct iomap_iter iter = { 1589 1574 .inode = inode, 1590 1575 .pos = pos, 1591 1576 .len = len, 1592 1577 .flags = IOMAP_ZERO, 1593 1578 .private = private, 1579 + .fbatch = &fbatch, 1594 1580 }; 1595 1581 struct address_space *mapping = inode->i_mapping; 1596 1582 int ret; 1597 1583 bool range_dirty; 1584 + 1585 + folio_batch_init(&fbatch); 1598 1586 1599 1587 /* 1600 1588 * To avoid an unconditional flush, check pagecache state and only flush ··· 1610 1590 while ((ret = iomap_iter(&iter, ops)) > 0) { 1611 1591 const struct iomap *srcmap = iomap_iter_srcmap(&iter); 1612 1592 1613 - if (WARN_ON_ONCE(iter.fbatch && 1593 + if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && 1614 1594 srcmap->type != IOMAP_UNWRITTEN)) 1615 1595 return -EIO; 1616 1596 1617 - if (!iter.fbatch && 1597 + if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && 1618 1598 (srcmap->type == IOMAP_HOLE || 1619 1599 srcmap->type == IOMAP_UNWRITTEN)) { 1620 1600 s64 status;
+3 -3
fs/iomap/iter.c
··· 8 8 9 9 static inline void iomap_iter_reset_iomap(struct iomap_iter *iter) 10 10 { 11 - if (iter->fbatch) { 11 + if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { 12 12 folio_batch_release(iter->fbatch); 13 - kfree(iter->fbatch); 14 - iter->fbatch = NULL; 13 + folio_batch_reinit(iter->fbatch); 14 + iter->iomap.flags &= ~IOMAP_F_FOLIO_BATCH; 15 15 } 16 16 17 17 iter->status = 0;
+6 -5
fs/xfs/xfs_iomap.c
··· 1831 1831 */ 1832 1832 if (flags & IOMAP_ZERO) { 1833 1833 xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 1834 - u64 end; 1835 1834 1836 1835 if (isnullstartblock(imap.br_startblock) && 1837 1836 offset_fsb >= eof_fsb) ··· 1850 1851 */ 1851 1852 if (imap.br_state == XFS_EXT_UNWRITTEN && 1852 1853 offset_fsb < eof_fsb) { 1853 - loff_t len = min(count, 1854 - XFS_FSB_TO_B(mp, imap.br_blockcount)); 1854 + loff_t foffset = offset, fend; 1855 1855 1856 - end = iomap_fill_dirty_folios(iter, offset, len); 1856 + fend = offset + 1857 + min(count, XFS_FSB_TO_B(mp, imap.br_blockcount)); 1858 + iomap_fill_dirty_folios(iter, &foffset, fend, 1859 + &iomap_flags); 1857 1860 end_fsb = min_t(xfs_fileoff_t, end_fsb, 1858 - XFS_B_TO_FSB(mp, end)); 1861 + XFS_B_TO_FSB(mp, foffset)); 1859 1862 } 1860 1863 1861 1864 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
+6 -2
include/linux/iomap.h
··· 88 88 /* 89 89 * Flags set by the core iomap code during operations: 90 90 * 91 + * IOMAP_F_FOLIO_BATCH indicates that the folio batch mechanism is active 92 + * for this operation, set by iomap_fill_dirty_folios(). 93 + * 91 94 * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size 92 95 * has changed as the result of this write operation. 93 96 * ··· 98 95 * range it covers needs to be remapped by the high level before the operation 99 96 * can proceed. 100 97 */ 98 + #define IOMAP_F_FOLIO_BATCH (1U << 13) 101 99 #define IOMAP_F_SIZE_CHANGED (1U << 14) 102 100 #define IOMAP_F_STALE (1U << 15) 103 101 ··· 356 352 int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, 357 353 const struct iomap_ops *ops, 358 354 const struct iomap_write_ops *write_ops); 359 - loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset, 360 - loff_t length); 355 + unsigned int iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t *start, 356 + loff_t end, unsigned int *iomap_flags); 361 357 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, 362 358 bool *did_zero, const struct iomap_ops *ops, 363 359 const struct iomap_write_ops *write_ops, void *private);