Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iomap: add caller-provided callbacks for read and readahead

Add caller-provided callbacks for read and readahead so that it can be
used generically, especially by filesystems that are not block-based.

In particular, this:
* Modifies the read and readahead interface to take in a
struct iomap_read_folio_ctx that is publicly defined as:

struct iomap_read_folio_ctx {
const struct iomap_read_ops *ops;
struct folio *cur_folio;
struct readahead_control *rac;
void *read_ctx;
};

where struct iomap_read_ops is defined as:

struct iomap_read_ops {
int (*read_folio_range)(const struct iomap_iter *iter,
struct iomap_read_folio_ctx *ctx,
size_t len);
void (*read_submit)(struct iomap_read_folio_ctx *ctx);
};

read_folio_range() reads in the folio range and is required by the
caller to provide. read_submit() is optional and is used for
submitting any pending read requests.

* Modifies existing filesystems that use iomap for read and readahead to
use the new API, through the new statically inlined helpers
iomap_bio_read_folio() and iomap_bio_readahead(). There is no change
in functionality for those filesystems.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Joanne Koong and committed by
Christian Brauner
b2f35ac4 fb7a10ac

+149 -39
+44
Documentation/filesystems/iomap/operations.rst
··· 135 135 136 136 * ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``. 137 137 138 + ``struct iomap_read_ops`` 139 + -------------------------- 140 + 141 + .. code-block:: c 142 + 143 + struct iomap_read_ops { 144 + int (*read_folio_range)(const struct iomap_iter *iter, 145 + struct iomap_read_folio_ctx *ctx, size_t len); 146 + void (*submit_read)(struct iomap_read_folio_ctx *ctx); 147 + }; 148 + 149 + iomap calls these functions: 150 + 151 + - ``read_folio_range``: Called to read in the range. This must be provided 152 + by the caller. The caller is responsible for calling 153 + iomap_finish_folio_read() after reading in the folio range. This should be 154 + done even if an error is encountered during the read. This returns 0 on 155 + success or a negative error on failure. 156 + 157 + - ``submit_read``: Submit any pending read requests. This function is 158 + optional. 159 + 138 160 Internal per-Folio State 139 161 ------------------------ 140 162 ··· 203 181 The ``flags`` argument to ``->iomap_begin`` will be set to zero. 204 182 The pagecache takes whatever locks it needs before calling the 205 183 filesystem. 184 + 185 + Both ``iomap_readahead`` and ``iomap_read_folio`` pass in a ``struct 186 + iomap_read_folio_ctx``: 187 + 188 + .. code-block:: c 189 + 190 + struct iomap_read_folio_ctx { 191 + const struct iomap_read_ops *ops; 192 + struct folio *cur_folio; 193 + struct readahead_control *rac; 194 + void *read_ctx; 195 + }; 196 + 197 + ``iomap_readahead`` must set: 198 + * ``ops->read_folio_range()`` and ``rac`` 199 + 200 + ``iomap_read_folio`` must set: 201 + * ``ops->read_folio_range()`` and ``cur_folio`` 202 + 203 + ``ops->submit_read()`` and ``read_ctx`` are optional. ``read_ctx`` is used to 204 + pass in any custom data the caller needs accessible in the ops callbacks for 205 + fulfilling reads. 206 206 207 207 Buffered Writes 208 208 ---------------
+3 -2
block/fops.c
··· 540 540 #else /* CONFIG_BUFFER_HEAD */ 541 541 static int blkdev_read_folio(struct file *file, struct folio *folio) 542 542 { 543 - return iomap_read_folio(folio, &blkdev_iomap_ops); 543 + iomap_bio_read_folio(folio, &blkdev_iomap_ops); 544 + return 0; 544 545 } 545 546 546 547 static void blkdev_readahead(struct readahead_control *rac) 547 548 { 548 - iomap_readahead(rac, &blkdev_iomap_ops); 549 + iomap_bio_readahead(rac, &blkdev_iomap_ops); 549 550 } 550 551 551 552 static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
+3 -2
fs/erofs/data.c
··· 371 371 { 372 372 trace_erofs_read_folio(folio, true); 373 373 374 - return iomap_read_folio(folio, &erofs_iomap_ops); 374 + iomap_bio_read_folio(folio, &erofs_iomap_ops); 375 + return 0; 375 376 } 376 377 377 378 static void erofs_readahead(struct readahead_control *rac) ··· 380 379 trace_erofs_readahead(rac->mapping->host, readahead_index(rac), 381 380 readahead_count(rac), true); 382 381 383 - return iomap_readahead(rac, &erofs_iomap_ops); 382 + iomap_bio_readahead(rac, &erofs_iomap_ops); 384 383 } 385 384 386 385 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
+3 -3
fs/gfs2/aops.c
··· 424 424 struct inode *inode = folio->mapping->host; 425 425 struct gfs2_inode *ip = GFS2_I(inode); 426 426 struct gfs2_sbd *sdp = GFS2_SB(inode); 427 - int error; 427 + int error = 0; 428 428 429 429 if (!gfs2_is_jdata(ip) || 430 430 (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { 431 - error = iomap_read_folio(folio, &gfs2_iomap_ops); 431 + iomap_bio_read_folio(folio, &gfs2_iomap_ops); 432 432 } else if (gfs2_is_stuffed(ip)) { 433 433 error = stuffed_read_folio(ip, folio); 434 434 } else { ··· 503 503 else if (gfs2_is_jdata(ip)) 504 504 mpage_readahead(rac, gfs2_block_map); 505 505 else 506 - iomap_readahead(rac, &gfs2_iomap_ops); 506 + iomap_bio_readahead(rac, &gfs2_iomap_ops); 507 507 } 508 508 509 509 /**
+29 -26
fs/iomap/buffered-io.c
··· 328 328 } 329 329 330 330 #ifdef CONFIG_BLOCK 331 - static void iomap_finish_folio_read(struct folio *folio, size_t off, 332 - size_t len, int error) 331 + void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, 332 + int error) 333 333 { 334 334 struct iomap_folio_state *ifs = folio->private; 335 335 bool uptodate = !error; ··· 349 349 if (finished) 350 350 folio_end_read(folio, uptodate); 351 351 } 352 + EXPORT_SYMBOL_GPL(iomap_finish_folio_read); 352 353 353 354 static void iomap_read_end_io(struct bio *bio) 354 355 { ··· 361 360 bio_put(bio); 362 361 } 363 362 364 - struct iomap_read_folio_ctx { 365 - struct folio *cur_folio; 366 - void *read_ctx; 367 - struct readahead_control *rac; 368 - }; 369 - 370 363 static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) 371 364 { 372 365 struct bio *bio = ctx->read_ctx; ··· 369 374 submit_bio(bio); 370 375 } 371 376 372 - static void iomap_bio_read_folio_range(const struct iomap_iter *iter, 377 + static int iomap_bio_read_folio_range(const struct iomap_iter *iter, 373 378 struct iomap_read_folio_ctx *ctx, size_t plen) 374 379 { 375 380 struct folio *folio = ctx->cur_folio; ··· 407 412 bio_add_folio_nofail(bio, folio, plen, poff); 408 413 ctx->read_ctx = bio; 409 414 } 415 + return 0; 410 416 } 417 + 418 + const struct iomap_read_ops iomap_bio_read_ops = { 419 + .read_folio_range = iomap_bio_read_folio_range, 420 + .submit_read = iomap_bio_submit_read, 421 + }; 422 + EXPORT_SYMBOL_GPL(iomap_bio_read_ops); 411 423 412 424 static void iomap_read_init(struct folio *folio) 413 425 { ··· 546 544 if (!*bytes_pending) 547 545 iomap_read_init(folio); 548 546 *bytes_pending += plen; 549 - iomap_bio_read_folio_range(iter, ctx, plen); 547 + ret = ctx->ops->read_folio_range(iter, ctx, plen); 548 + if (ret) 549 + return ret; 550 550 } 551 551 552 552 ret = iomap_iter_advance(iter, plen); ··· 560 556 return 0; 561 557 } 562 558 563 - int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) 559 + int iomap_read_folio(const struct iomap_ops *ops, 560 + struct iomap_read_folio_ctx *ctx) 564 561 { 562 + struct folio *folio = ctx->cur_folio; 565 563 struct iomap_iter iter = { 566 564 .inode = folio->mapping->host, 567 565 .pos = folio_pos(folio), 568 566 .len = folio_size(folio), 569 - }; 570 - struct iomap_read_folio_ctx ctx = { 571 - .cur_folio = folio, 572 567 }; 573 568 size_t bytes_pending = 0; 574 569 int ret; ··· 575 572 trace_iomap_readpage(iter.inode, 1); 576 573 577 574 while ((ret = iomap_iter(&iter, ops)) > 0) 578 - iter.status = iomap_read_folio_iter(&iter, &ctx, 579 - &bytes_pending); 575 + iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_pending); 580 576 581 - iomap_bio_submit_read(&ctx); 577 + if (ctx->ops->submit_read) 578 + ctx->ops->submit_read(ctx); 582 579 583 580 iomap_read_end(folio, bytes_pending); 584 581 ··· 618 615 619 616 /** 620 617 * iomap_readahead - Attempt to read pages from a file. 621 - * @rac: Describes the pages to be read. 622 618 * @ops: The operations vector for the filesystem. 619 + * @ctx: The ctx used for issuing readahead. 623 620 * 624 621 * This function is for filesystems to call to implement their readahead 625 622 * address_space operation. ··· 631 628 * function is called with memalloc_nofs set, so allocations will not cause 632 629 * the filesystem to be reentered. 633 630 */ 634 - void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) 631 + void iomap_readahead(const struct iomap_ops *ops, 632 + struct iomap_read_folio_ctx *ctx) 635 633 { 634 + struct readahead_control *rac = ctx->rac; 636 635 struct iomap_iter iter = { 637 636 .inode = rac->mapping->host, 638 637 .pos = readahead_pos(rac), 639 638 .len = readahead_length(rac), 640 - }; 641 - struct iomap_read_folio_ctx ctx = { 642 - .rac = rac, 643 639 }; 644 640 size_t cur_bytes_pending; 645 641 646 642 trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); 647 643 648 644 while (iomap_iter(&iter, ops) > 0) 649 - iter.status = iomap_readahead_iter(&iter, &ctx, 645 + iter.status = iomap_readahead_iter(&iter, ctx, 650 646 &cur_bytes_pending); 651 647 652 - iomap_bio_submit_read(&ctx); 648 + if (ctx->ops->submit_read) 649 + ctx->ops->submit_read(ctx); 653 650 654 - if (ctx.cur_folio) 655 - iomap_read_end(ctx.cur_folio, cur_bytes_pending); 651 + if (ctx->cur_folio) 652 + iomap_read_end(ctx->cur_folio, cur_bytes_pending); 656 653 } 657 654 EXPORT_SYMBOL_GPL(iomap_readahead); 658 655
+3 -2
fs/xfs/xfs_aops.c
··· 742 742 struct file *unused, 743 743 struct folio *folio) 744 744 { 745 - return iomap_read_folio(folio, &xfs_read_iomap_ops); 745 + iomap_bio_read_folio(folio, &xfs_read_iomap_ops); 746 + return 0; 746 747 } 747 748 748 749 STATIC void 749 750 xfs_vm_readahead( 750 751 struct readahead_control *rac) 751 752 { 752 - iomap_readahead(rac, &xfs_read_iomap_ops); 753 + iomap_bio_readahead(rac, &xfs_read_iomap_ops); 753 754 } 754 755 755 756 static int
+3 -2
fs/zonefs/file.c
··· 112 112 113 113 static int zonefs_read_folio(struct file *unused, struct folio *folio) 114 114 { 115 - return iomap_read_folio(folio, &zonefs_read_iomap_ops); 115 + iomap_bio_read_folio(folio, &zonefs_read_iomap_ops); 116 + return 0; 116 117 } 117 118 118 119 static void zonefs_readahead(struct readahead_control *rac) 119 120 { 120 - iomap_readahead(rac, &zonefs_read_iomap_ops); 121 + iomap_bio_readahead(rac, &zonefs_read_iomap_ops); 121 122 } 122 123 123 124 /*
+61 -2
include/linux/iomap.h
··· 16 16 struct iomap_iter; 17 17 struct iomap_dio; 18 18 struct iomap_writepage_ctx; 19 + struct iomap_read_folio_ctx; 19 20 struct iov_iter; 20 21 struct kiocb; 21 22 struct page; ··· 338 337 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, 339 338 const struct iomap_ops *ops, 340 339 const struct iomap_write_ops *write_ops, void *private); 341 - int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); 342 - void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); 340 + int iomap_read_folio(const struct iomap_ops *ops, 341 + struct iomap_read_folio_ctx *ctx); 342 + void iomap_readahead(const struct iomap_ops *ops, 343 + struct iomap_read_folio_ctx *ctx); 343 344 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); 344 345 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); 345 346 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); ··· 468 465 loff_t pos, loff_t end_pos, unsigned int dirty_len); 469 466 int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); 470 467 468 + void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, 469 + int error); 471 470 void iomap_start_folio_write(struct inode *inode, struct folio *folio, 472 471 size_t len); 473 472 void iomap_finish_folio_write(struct inode *inode, struct folio *folio, ··· 477 472 478 473 int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio); 479 474 int iomap_writepages(struct iomap_writepage_ctx *wpc); 475 + 476 + struct iomap_read_folio_ctx { 477 + const struct iomap_read_ops *ops; 478 + struct folio *cur_folio; 479 + struct readahead_control *rac; 480 + void *read_ctx; 481 + }; 482 + 483 + struct iomap_read_ops { 484 + /* 485 + * Read in a folio range. 486 + * 487 + * The caller is responsible for calling iomap_finish_folio_read() after 488 + * reading in the folio range. This should be done even if an error is 489 + * encountered during the read. 490 + * 491 + * Returns 0 on success or a negative error on failure. 492 + */ 493 + int (*read_folio_range)(const struct iomap_iter *iter, 494 + struct iomap_read_folio_ctx *ctx, size_t len); 495 + 496 + /* 497 + * Submit any pending read requests. 498 + * 499 + * This is optional. 500 + */ 501 + void (*submit_read)(struct iomap_read_folio_ctx *ctx); 502 + }; 480 503 481 504 /* 482 505 * Flags for direct I/O ->end_io: ··· 570 537 #endif /* CONFIG_SWAP */ 571 538 572 539 extern struct bio_set iomap_ioend_bioset; 540 + 541 + #ifdef CONFIG_BLOCK 542 + extern const struct iomap_read_ops iomap_bio_read_ops; 543 + 544 + static inline void iomap_bio_read_folio(struct folio *folio, 545 + const struct iomap_ops *ops) 546 + { 547 + struct iomap_read_folio_ctx ctx = { 548 + .ops = &iomap_bio_read_ops, 549 + .cur_folio = folio, 550 + }; 551 + 552 + iomap_read_folio(ops, &ctx); 553 + } 554 + 555 + static inline void iomap_bio_readahead(struct readahead_control *rac, 556 + const struct iomap_ops *ops) 557 + { 558 + struct iomap_read_folio_ctx ctx = { 559 + .ops = &iomap_bio_read_ops, 560 + .rac = rac, 561 + }; 562 + 563 + iomap_readahead(ops, &ctx); 564 + } 565 + #endif /* CONFIG_BLOCK */ 573 566 574 567 #endif /* LINUX_IOMAP_H */