commit 5825294edd3364cbba6514f70d88debec4f6cec7

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

xfs: make inode flush at ENOSPC synchronous

When we are writing to a single file and hit ENOSPC, we trigger a background
flush of the inode and try again. Because we hold page locks and the iolock,
the flush won't proceed until after we release these locks. This occurs once
we've given up and ENOSPC has been reported. Hence if this one is the only
dirty inode in the system, we'll get an ENOSPC prematurely.

To fix this, remove the async flush from the allocation routines and move
it to the top of the write path where we can do a synchronous flush
and retry the write again. Only retry once as a second ENOSPC indicates
that we really are ENOSPC.

This avoids a page cache deadlock when trying to do this flush synchronously
in the allocation layer that was identified by Mikulas Patocka.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

authored by

Dave Chinner and committed by

Christoph Hellwig 17 years ago 5825294e a8d770d9

+18 -28

4 changed files

expand all

unified split

xfs

linux-2.6

xfs_lrw.c

xfs_sync.c

xfs_sync.h

xfs_iomap.c

+17 -1

fs/xfs/linux-2.6/xfs_lrw.c

··· 751 751 goto relock; 752 752 } 753 753 } else { 754 + int enospc = 0; 755 + ssize_t ret2 = 0; 756 + 757 + write_retry: 754 758 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 755 759 *offset, ioflags); 756 - ret = generic_file_buffered_write(iocb, iovp, segs, 760 + ret2 = generic_file_buffered_write(iocb, iovp, segs, 757 761 pos, offset, count, ret); 762 + /* 763 + * if we just got an ENOSPC, flush the inode now we 764 + * aren't holding any page locks and retry *once* 765 + */ 766 + if (ret2 == -ENOSPC && !enospc) { 767 + error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); 768 + if (error) 769 + goto out_unlock_internal; 770 + enospc = 1; 771 + goto write_retry; 772 + } 773 + ret = ret2; 758 774 } 759 775 760 776 current->backing_dev_info = NULL;

-25

fs/xfs/linux-2.6/xfs_sync.c

··· 426 426 * heads, looking about for more room... 427 427 */ 428 428 STATIC void 429 - xfs_flush_inode_work( 430 - struct xfs_mount *mp, 431 - void *arg) 432 - { 433 - struct inode *inode = arg; 434 - filemap_flush(inode->i_mapping); 435 - iput(inode); 436 - } 437 - 438 - void 439 - xfs_flush_inode( 440 - xfs_inode_t *ip) 441 - { 442 - struct inode *inode = VFS_I(ip); 443 - 444 - igrab(inode); 445 - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); 446 - delay(msecs_to_jiffies(500)); 447 - } 448 - 449 - /* 450 - * This is the "bigger hammer" version of xfs_flush_inode_work... 451 - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). 452 - */ 453 - STATIC void 454 429 xfs_flush_inodes_work( 455 430 struct xfs_mount *mp, 456 431 void *arg)

-1

fs/xfs/linux-2.6/xfs_sync.h

··· 44 44 int xfs_quiesce_data(struct xfs_mount *mp); 45 45 void xfs_quiesce_attr(struct xfs_mount *mp); 46 46 47 - void xfs_flush_inode(struct xfs_inode *ip); 48 47 void xfs_flush_inodes(struct xfs_inode *ip); 49 48 50 49 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);

+1 -1

fs/xfs/xfs_iomap.c

··· 347 347 case 0: 348 348 if (ip->i_delayed_blks) { 349 349 xfs_iunlock(ip, XFS_ILOCK_EXCL); 350 - xfs_flush_inode(ip); 350 + delay(1); 351 351 xfs_ilock(ip, XFS_ILOCK_EXCL); 352 352 *fsynced = 1; 353 353 } else {