Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iomap: track pending read bytes more optimally

Instead of incrementing read_bytes_pending for every folio range read in
(which requires acquiring the spinlock to do so), set read_bytes_pending
to the folio size when the first range is asynchronously read in, keep
track of how many bytes total are asynchronously read in, and adjust
read_bytes_pending accordingly after issuing requests to read in all the
necessary ranges.

iomap_read_folio_ctx->cur_folio_in_bio can be removed since a non-zero
value for pending bytes necessarily indicates the folio is in the bio.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Joanne Koong and committed by
Christian Brauner
d43558ae 87a13819

+102 -21
+102 -21
fs/iomap/buffered-io.c
··· 362 362 363 363 struct iomap_read_folio_ctx { 364 364 struct folio *cur_folio; 365 - bool cur_folio_in_bio; 366 365 void *read_ctx; 367 366 struct readahead_control *rac; 368 367 }; ··· 379 380 { 380 381 struct folio *folio = ctx->cur_folio; 381 382 const struct iomap *iomap = &iter->iomap; 382 - struct iomap_folio_state *ifs = folio->private; 383 383 size_t poff = offset_in_folio(folio, pos); 384 384 loff_t length = iomap_length(iter); 385 385 sector_t sector; 386 386 struct bio *bio = ctx->read_ctx; 387 - 388 - ctx->cur_folio_in_bio = true; 389 - if (ifs) { 390 - spin_lock_irq(&ifs->state_lock); 391 - ifs->read_bytes_pending += plen; 392 - spin_unlock_irq(&ifs->state_lock); 393 - } 394 387 395 388 sector = iomap_sector(iomap, pos); 396 389 if (!bio || bio_end_sector(bio) != sector || ··· 413 422 } 414 423 } 415 424 425 + static void iomap_read_init(struct folio *folio) 426 + { 427 + struct iomap_folio_state *ifs = folio->private; 428 + 429 + if (ifs) { 430 + size_t len = folio_size(folio); 431 + 432 + /* 433 + * ifs->read_bytes_pending is used to track how many bytes are 434 + * read in asynchronously by the IO helper. We need to track 435 + * this so that we can know when the IO helper has finished 436 + * reading in all the necessary ranges of the folio and can end 437 + * the read. 438 + * 439 + * Increase ->read_bytes_pending by the folio size to start, and 440 + * add a +1 bias. We'll subtract the bias and any uptodate / 441 + * zeroed ranges that did not require IO in iomap_read_end() 442 + * after we're done processing the folio. 443 + * 444 + * We do this because otherwise, we would have to increment 445 + * ifs->read_bytes_pending every time a range in the folio needs 446 + * to be read in, which can get expensive since the spinlock 447 + * needs to be held whenever modifying ifs->read_bytes_pending. 448 + * 449 + * We add the bias to ensure the read has not been ended on the 450 + * folio when iomap_read_end() is called, even if the IO helper 451 + * has already finished reading in the entire folio. 452 + */ 453 + spin_lock_irq(&ifs->state_lock); 454 + ifs->read_bytes_pending += len + 1; 455 + spin_unlock_irq(&ifs->state_lock); 456 + } 457 + } 458 + 459 + /* 460 + * This ends IO if no bytes were submitted to an IO helper. 461 + * 462 + * Otherwise, this calibrates ifs->read_bytes_pending to represent only the 463 + * submitted bytes (see comment in iomap_read_init()). If all bytes submitted 464 + * have already been completed by the IO helper, then this will end the read. 465 + * Else the IO helper will end the read after all submitted ranges have been 466 + * read. 467 + */ 468 + static void iomap_read_end(struct folio *folio, size_t bytes_pending) 469 + { 470 + struct iomap_folio_state *ifs; 471 + 472 + /* 473 + * If there are no bytes pending, this means we are responsible for 474 + * unlocking the folio here, since no IO helper has taken ownership of 475 + * it. 476 + */ 477 + if (!bytes_pending) { 478 + folio_unlock(folio); 479 + return; 480 + } 481 + 482 + ifs = folio->private; 483 + if (ifs) { 484 + bool end_read, uptodate; 485 + /* 486 + * Subtract any bytes that were initially accounted to 487 + * read_bytes_pending but skipped for IO. 488 + * The +1 accounts for the bias we added in iomap_read_init(). 489 + */ 490 + size_t bytes_accounted = folio_size(folio) + 1 - 491 + bytes_pending; 492 + 493 + spin_lock_irq(&ifs->state_lock); 494 + ifs->read_bytes_pending -= bytes_accounted; 495 + /* 496 + * If !ifs->read_bytes_pending, this means all pending reads 497 + * by the IO helper have already completed, which means we need 498 + * to end the folio read here. If ifs->read_bytes_pending != 0, 499 + * the IO helper will end the folio read. 500 + */ 501 + end_read = !ifs->read_bytes_pending; 502 + if (end_read) 503 + uptodate = ifs_is_fully_uptodate(folio, ifs); 504 + spin_unlock_irq(&ifs->state_lock); 505 + if (end_read) 506 + folio_end_read(folio, uptodate); 507 + } 508 + } 509 + 416 510 static int iomap_read_folio_iter(struct iomap_iter *iter, 417 - struct iomap_read_folio_ctx *ctx) 511 + struct iomap_read_folio_ctx *ctx, size_t *bytes_pending) 418 512 { 419 513 const struct iomap *iomap = &iter->iomap; 420 514 loff_t pos = iter->pos; ··· 536 460 folio_zero_range(folio, poff, plen); 537 461 iomap_set_range_uptodate(folio, poff, plen); 538 462 } else { 463 + if (!*bytes_pending) 464 + iomap_read_init(folio); 465 + *bytes_pending += plen; 539 466 iomap_bio_read_folio_range(iter, ctx, pos, plen); 540 467 } 541 468 ··· 561 482 struct iomap_read_folio_ctx ctx = { 562 483 .cur_folio = folio, 563 484 }; 485 + size_t bytes_pending = 0; 564 486 int ret; 565 487 566 488 trace_iomap_readpage(iter.inode, 1); 567 489 568 490 while ((ret = iomap_iter(&iter, ops)) > 0) 569 - iter.status = iomap_read_folio_iter(&iter, &ctx); 491 + iter.status = iomap_read_folio_iter(&iter, &ctx, 492 + &bytes_pending); 570 493 571 494 iomap_bio_submit_read(&ctx); 572 495 573 - if (!ctx.cur_folio_in_bio) 574 - folio_unlock(folio); 496 + iomap_read_end(folio, bytes_pending); 575 497 576 498 /* 577 499 * Just like mpage_readahead and block_read_full_folio, we always ··· 584 504 EXPORT_SYMBOL_GPL(iomap_read_folio); 585 505 586 506 static int iomap_readahead_iter(struct iomap_iter *iter, 587 - struct iomap_read_folio_ctx *ctx) 507 + struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending) 588 508 { 589 509 int ret; 590 510 591 511 while (iomap_length(iter)) { 592 512 if (ctx->cur_folio && 593 513 offset_in_folio(ctx->cur_folio, iter->pos) == 0) { 594 - if (!ctx->cur_folio_in_bio) 595 - folio_unlock(ctx->cur_folio); 514 + iomap_read_end(ctx->cur_folio, *cur_bytes_pending); 596 515 ctx->cur_folio = NULL; 597 516 } 598 517 if (!ctx->cur_folio) { 599 518 ctx->cur_folio = readahead_folio(ctx->rac); 600 519 if (WARN_ON_ONCE(!ctx->cur_folio)) 601 520 return -EINVAL; 602 - ctx->cur_folio_in_bio = false; 521 + *cur_bytes_pending = 0; 603 522 } 604 - ret = iomap_read_folio_iter(iter, ctx); 523 + ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending); 605 524 if (ret) 606 525 return ret; 607 526 } ··· 633 554 struct iomap_read_folio_ctx ctx = { 634 555 .rac = rac, 635 556 }; 557 + size_t cur_bytes_pending; 636 558 637 559 trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); 638 560 639 561 while (iomap_iter(&iter, ops) > 0) 640 - iter.status = iomap_readahead_iter(&iter, &ctx); 562 + iter.status = iomap_readahead_iter(&iter, &ctx, 563 + &cur_bytes_pending); 641 564 642 565 iomap_bio_submit_read(&ctx); 643 566 644 - if (ctx.cur_folio && !ctx.cur_folio_in_bio) 645 - folio_unlock(ctx.cur_folio); 567 + if (ctx.cur_folio) 568 + iomap_read_end(ctx.cur_folio, cur_bytes_pending); 646 569 } 647 570 EXPORT_SYMBOL_GPL(iomap_readahead); 648 571