Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iomap: Add done_before argument to iomap_dio_rw

Add a done_before argument to iomap_dio_rw that indicates how much of
the request has already been transferred. When the request succeeds, we
report that done_before additional bytes were tranferred. This is
useful for finishing a request asynchronously when part of the request
has already been completed synchronously.

We'll use that to allow iomap_dio_rw to be used with page faults
disabled: when a page fault occurs while submitting a request, we
synchronously complete the part of the request that has already been
submitted. The caller can then take care of the page fault and call
iomap_dio_rw again for the rest of the request, passing in the number of
bytes already tranferred.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

+32 -17
+3 -2
fs/btrfs/file.c
··· 1957 1957 } 1958 1958 1959 1959 dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 1960 - 0); 1960 + 0, 0); 1961 1961 1962 1962 btrfs_inode_unlock(inode, ilock_flags); 1963 1963 ··· 3658 3658 return 0; 3659 3659 3660 3660 btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); 3661 - ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0); 3661 + ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 3662 + 0, 0); 3662 3663 btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); 3663 3664 return ret; 3664 3665 }
+1 -1
fs/erofs/data.c
··· 287 287 288 288 if (!err) 289 289 return iomap_dio_rw(iocb, to, &erofs_iomap_ops, 290 - NULL, 0); 290 + NULL, 0, 0); 291 291 if (err < 0) 292 292 return err; 293 293 }
+3 -2
fs/ext4/file.c
··· 74 74 return generic_file_read_iter(iocb, to); 75 75 } 76 76 77 - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); 77 + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0); 78 78 inode_unlock_shared(inode); 79 79 80 80 file_accessed(iocb->ki_filp); ··· 566 566 if (ilock_shared) 567 567 iomap_ops = &ext4_iomap_overwrite_ops; 568 568 ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, 569 - (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0); 569 + (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, 570 + 0); 570 571 if (ret == -ENOTBLK) 571 572 ret = 0; 572 573
+2 -2
fs/gfs2/file.c
··· 822 822 if (ret) 823 823 goto out_uninit; 824 824 825 - ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0); 825 + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0); 826 826 gfs2_glock_dq(gh); 827 827 out_uninit: 828 828 gfs2_holder_uninit(gh); ··· 856 856 if (offset + len > i_size_read(&ip->i_inode)) 857 857 goto out; 858 858 859 - ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0); 859 + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0); 860 860 if (ret == -ENOTBLK) 861 861 ret = 0; 862 862 out:
+16 -3
fs/iomap/direct-io.c
··· 31 31 atomic_t ref; 32 32 unsigned flags; 33 33 int error; 34 + size_t done_before; 34 35 bool wait_for_completion; 35 36 36 37 union { ··· 124 123 */ 125 124 if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) 126 125 ret = generic_write_sync(iocb, ret); 126 + 127 + if (ret > 0) 128 + ret += dio->done_before; 127 129 128 130 kfree(dio); 129 131 ··· 454 450 * may be pure data writes. In that case, we still need to do a full data sync 455 451 * completion. 456 452 * 453 + * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL, 454 + * __iomap_dio_rw can return a partial result if it encounters a non-resident 455 + * page in @iter after preparing a transfer. In that case, the non-resident 456 + * pages can be faulted in and the request resumed with @done_before set to the 457 + * number of bytes previously transferred. The request will then complete with 458 + * the correct total number of bytes transferred; this is essential for 459 + * completing partial requests asynchronously. 460 + * 457 461 * Returns -ENOTBLK In case of a page invalidation invalidation failure for 458 462 * writes. The callers needs to fall back to buffered I/O in this case. 459 463 */ 460 464 struct iomap_dio * 461 465 __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 462 466 const struct iomap_ops *ops, const struct iomap_dio_ops *dops, 463 - unsigned int dio_flags) 467 + unsigned int dio_flags, size_t done_before) 464 468 { 465 469 struct address_space *mapping = iocb->ki_filp->f_mapping; 466 470 struct inode *inode = file_inode(iocb->ki_filp); ··· 498 486 dio->dops = dops; 499 487 dio->error = 0; 500 488 dio->flags = 0; 489 + dio->done_before = done_before; 501 490 502 491 dio->submit.iter = iter; 503 492 dio->submit.waiter = current; ··· 665 652 ssize_t 666 653 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 667 654 const struct iomap_ops *ops, const struct iomap_dio_ops *dops, 668 - unsigned int dio_flags) 655 + unsigned int dio_flags, size_t done_before) 669 656 { 670 657 struct iomap_dio *dio; 671 658 672 - dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags); 659 + dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before); 673 660 if (IS_ERR_OR_NULL(dio)) 674 661 return PTR_ERR_OR_ZERO(dio); 675 662 return iomap_dio_complete(dio);
+3 -3
fs/xfs/xfs_file.c
··· 259 259 ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); 260 260 if (ret) 261 261 return ret; 262 - ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0); 262 + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0); 263 263 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 264 264 265 265 return ret; ··· 569 569 } 570 570 trace_xfs_file_direct_write(iocb, from); 571 571 ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 572 - &xfs_dio_write_ops, 0); 572 + &xfs_dio_write_ops, 0, 0); 573 573 out_unlock: 574 574 if (iolock) 575 575 xfs_iunlock(ip, iolock); ··· 647 647 648 648 trace_xfs_file_direct_write(iocb, from); 649 649 ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 650 - &xfs_dio_write_ops, flags); 650 + &xfs_dio_write_ops, flags, 0); 651 651 652 652 /* 653 653 * Retry unaligned I/O with exclusive blocking semantics if the DIO
+2 -2
fs/zonefs/super.c
··· 852 852 ret = zonefs_file_dio_append(iocb, from); 853 853 else 854 854 ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, 855 - &zonefs_write_dio_ops, 0); 855 + &zonefs_write_dio_ops, 0, 0); 856 856 if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && 857 857 (ret > 0 || ret == -EIOCBQUEUED)) { 858 858 if (ret > 0) ··· 987 987 } 988 988 file_accessed(iocb->ki_filp); 989 989 ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, 990 - &zonefs_read_dio_ops, 0); 990 + &zonefs_read_dio_ops, 0, 0); 991 991 } else { 992 992 ret = generic_file_read_iter(iocb, to); 993 993 if (ret == -EIO)
+2 -2
include/linux/iomap.h
··· 339 339 340 340 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 341 341 const struct iomap_ops *ops, const struct iomap_dio_ops *dops, 342 - unsigned int dio_flags); 342 + unsigned int dio_flags, size_t done_before); 343 343 struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 344 344 const struct iomap_ops *ops, const struct iomap_dio_ops *dops, 345 - unsigned int dio_flags); 345 + unsigned int dio_flags, size_t done_before); 346 346 ssize_t iomap_dio_complete(struct iomap_dio *dio); 347 347 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); 348 348