Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.16-rc2 1209 lines 36 kB view raw
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* Network filesystem high-level read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8#include <linux/module.h> 9#include <linux/export.h> 10#include <linux/fs.h> 11#include <linux/mm.h> 12#include <linux/pagemap.h> 13#include <linux/slab.h> 14#include <linux/uio.h> 15#include <linux/sched/mm.h> 16#include <linux/task_io_accounting_ops.h> 17#include <linux/netfs.h> 18#include "internal.h" 19#define CREATE_TRACE_POINTS 20#include <trace/events/netfs.h> 21 22MODULE_DESCRIPTION("Network fs support"); 23MODULE_AUTHOR("Red Hat, Inc."); 24MODULE_LICENSE("GPL"); 25 26unsigned netfs_debug; 27module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); 28MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); 29 30static void netfs_rreq_work(struct work_struct *); 31static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool); 32 33static void netfs_put_subrequest(struct netfs_read_subrequest *subreq, 34 bool was_async) 35{ 36 if (refcount_dec_and_test(&subreq->usage)) 37 __netfs_put_subrequest(subreq, was_async); 38} 39 40static struct netfs_read_request *netfs_alloc_read_request( 41 const struct netfs_read_request_ops *ops, void *netfs_priv, 42 struct file *file) 43{ 44 static atomic_t debug_ids; 45 struct netfs_read_request *rreq; 46 47 rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL); 48 if (rreq) { 49 rreq->netfs_ops = ops; 50 rreq->netfs_priv = netfs_priv; 51 rreq->inode = file_inode(file); 52 rreq->i_size = i_size_read(rreq->inode); 53 rreq->debug_id = atomic_inc_return(&debug_ids); 54 INIT_LIST_HEAD(&rreq->subrequests); 55 INIT_WORK(&rreq->work, netfs_rreq_work); 56 refcount_set(&rreq->usage, 1); 57 __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 58 ops->init_rreq(rreq, file); 59 netfs_stat(&netfs_n_rh_rreq); 60 } 61 62 return rreq; 63} 64 65static void netfs_get_read_request(struct netfs_read_request *rreq) 66{ 67 refcount_inc(&rreq->usage); 68} 69 70static void netfs_rreq_clear_subreqs(struct netfs_read_request *rreq, 71 bool was_async) 72{ 73 struct netfs_read_subrequest *subreq; 74 75 while (!list_empty(&rreq->subrequests)) { 76 subreq = list_first_entry(&rreq->subrequests, 77 struct netfs_read_subrequest, rreq_link); 78 list_del(&subreq->rreq_link); 79 netfs_put_subrequest(subreq, was_async); 80 } 81} 82 83static void netfs_free_read_request(struct work_struct *work) 84{ 85 struct netfs_read_request *rreq = 86 container_of(work, struct netfs_read_request, work); 87 netfs_rreq_clear_subreqs(rreq, false); 88 if (rreq->netfs_priv) 89 rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv); 90 trace_netfs_rreq(rreq, netfs_rreq_trace_free); 91 if (rreq->cache_resources.ops) 92 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 93 kfree(rreq); 94 netfs_stat_d(&netfs_n_rh_rreq); 95} 96 97static void netfs_put_read_request(struct netfs_read_request *rreq, bool was_async) 98{ 99 if (refcount_dec_and_test(&rreq->usage)) { 100 if (was_async) { 101 rreq->work.func = netfs_free_read_request; 102 if (!queue_work(system_unbound_wq, &rreq->work)) 103 BUG(); 104 } else { 105 netfs_free_read_request(&rreq->work); 106 } 107 } 108} 109 110/* 111 * Allocate and partially initialise an I/O request structure. 112 */ 113static struct netfs_read_subrequest *netfs_alloc_subrequest( 114 struct netfs_read_request *rreq) 115{ 116 struct netfs_read_subrequest *subreq; 117 118 subreq = kzalloc(sizeof(struct netfs_read_subrequest), GFP_KERNEL); 119 if (subreq) { 120 INIT_LIST_HEAD(&subreq->rreq_link); 121 refcount_set(&subreq->usage, 2); 122 subreq->rreq = rreq; 123 netfs_get_read_request(rreq); 124 netfs_stat(&netfs_n_rh_sreq); 125 } 126 127 return subreq; 128} 129 130static void netfs_get_read_subrequest(struct netfs_read_subrequest *subreq) 131{ 132 refcount_inc(&subreq->usage); 133} 134 135static void __netfs_put_subrequest(struct netfs_read_subrequest *subreq, 136 bool was_async) 137{ 138 struct netfs_read_request *rreq = subreq->rreq; 139 140 trace_netfs_sreq(subreq, netfs_sreq_trace_free); 141 kfree(subreq); 142 netfs_stat_d(&netfs_n_rh_sreq); 143 netfs_put_read_request(rreq, was_async); 144} 145 146/* 147 * Clear the unread part of an I/O request. 148 */ 149static void netfs_clear_unread(struct netfs_read_subrequest *subreq) 150{ 151 struct iov_iter iter; 152 153 iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages, 154 subreq->start + subreq->transferred, 155 subreq->len - subreq->transferred); 156 iov_iter_zero(iov_iter_count(&iter), &iter); 157} 158 159static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, 160 bool was_async) 161{ 162 struct netfs_read_subrequest *subreq = priv; 163 164 netfs_subreq_terminated(subreq, transferred_or_error, was_async); 165} 166 167/* 168 * Issue a read against the cache. 169 * - Eats the caller's ref on subreq. 170 */ 171static void netfs_read_from_cache(struct netfs_read_request *rreq, 172 struct netfs_read_subrequest *subreq, 173 bool seek_data) 174{ 175 struct netfs_cache_resources *cres = &rreq->cache_resources; 176 struct iov_iter iter; 177 178 netfs_stat(&netfs_n_rh_read); 179 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, 180 subreq->start + subreq->transferred, 181 subreq->len - subreq->transferred); 182 183 cres->ops->read(cres, subreq->start, &iter, seek_data, 184 netfs_cache_read_terminated, subreq); 185} 186 187/* 188 * Fill a subrequest region with zeroes. 189 */ 190static void netfs_fill_with_zeroes(struct netfs_read_request *rreq, 191 struct netfs_read_subrequest *subreq) 192{ 193 netfs_stat(&netfs_n_rh_zero); 194 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 195 netfs_subreq_terminated(subreq, 0, false); 196} 197 198/* 199 * Ask the netfs to issue a read request to the server for us. 200 * 201 * The netfs is expected to read from subreq->pos + subreq->transferred to 202 * subreq->pos + subreq->len - 1. It may not backtrack and write data into the 203 * buffer prior to the transferred point as it might clobber dirty data 204 * obtained from the cache. 205 * 206 * Alternatively, the netfs is allowed to indicate one of two things: 207 * 208 * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and 209 * make progress. 210 * 211 * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be 212 * cleared. 213 */ 214static void netfs_read_from_server(struct netfs_read_request *rreq, 215 struct netfs_read_subrequest *subreq) 216{ 217 netfs_stat(&netfs_n_rh_download); 218 rreq->netfs_ops->issue_op(subreq); 219} 220 221/* 222 * Release those waiting. 223 */ 224static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async) 225{ 226 trace_netfs_rreq(rreq, netfs_rreq_trace_done); 227 netfs_rreq_clear_subreqs(rreq, was_async); 228 netfs_put_read_request(rreq, was_async); 229} 230 231/* 232 * Deal with the completion of writing the data to the cache. We have to clear 233 * the PG_fscache bits on the folios involved and release the caller's ref. 234 * 235 * May be called in softirq mode and we inherit a ref from the caller. 236 */ 237static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq, 238 bool was_async) 239{ 240 struct netfs_read_subrequest *subreq; 241 struct folio *folio; 242 pgoff_t unlocked = 0; 243 bool have_unlocked = false; 244 245 rcu_read_lock(); 246 247 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 248 XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); 249 250 xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { 251 /* We might have multiple writes from the same huge 252 * folio, but we mustn't unlock a folio more than once. 253 */ 254 if (have_unlocked && folio_index(folio) <= unlocked) 255 continue; 256 unlocked = folio_index(folio); 257 folio_end_fscache(folio); 258 have_unlocked = true; 259 } 260 } 261 262 rcu_read_unlock(); 263 netfs_rreq_completed(rreq, was_async); 264} 265 266static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, 267 bool was_async) 268{ 269 struct netfs_read_subrequest *subreq = priv; 270 struct netfs_read_request *rreq = subreq->rreq; 271 272 if (IS_ERR_VALUE(transferred_or_error)) { 273 netfs_stat(&netfs_n_rh_write_failed); 274 trace_netfs_failure(rreq, subreq, transferred_or_error, 275 netfs_fail_copy_to_cache); 276 } else { 277 netfs_stat(&netfs_n_rh_write_done); 278 } 279 280 trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); 281 282 /* If we decrement nr_wr_ops to 0, the ref belongs to us. */ 283 if (atomic_dec_and_test(&rreq->nr_wr_ops)) 284 netfs_rreq_unmark_after_write(rreq, was_async); 285 286 netfs_put_subrequest(subreq, was_async); 287} 288 289/* 290 * Perform any outstanding writes to the cache. We inherit a ref from the 291 * caller. 292 */ 293static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq) 294{ 295 struct netfs_cache_resources *cres = &rreq->cache_resources; 296 struct netfs_read_subrequest *subreq, *next, *p; 297 struct iov_iter iter; 298 int ret; 299 300 trace_netfs_rreq(rreq, netfs_rreq_trace_write); 301 302 /* We don't want terminating writes trying to wake us up whilst we're 303 * still going through the list. 304 */ 305 atomic_inc(&rreq->nr_wr_ops); 306 307 list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { 308 if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) { 309 list_del_init(&subreq->rreq_link); 310 netfs_put_subrequest(subreq, false); 311 } 312 } 313 314 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 315 /* Amalgamate adjacent writes */ 316 while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 317 next = list_next_entry(subreq, rreq_link); 318 if (next->start != subreq->start + subreq->len) 319 break; 320 subreq->len += next->len; 321 list_del_init(&next->rreq_link); 322 netfs_put_subrequest(next, false); 323 } 324 325 ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, 326 rreq->i_size); 327 if (ret < 0) { 328 trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); 329 trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); 330 continue; 331 } 332 333 iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, 334 subreq->start, subreq->len); 335 336 atomic_inc(&rreq->nr_wr_ops); 337 netfs_stat(&netfs_n_rh_write); 338 netfs_get_read_subrequest(subreq); 339 trace_netfs_sreq(subreq, netfs_sreq_trace_write); 340 cres->ops->write(cres, subreq->start, &iter, 341 netfs_rreq_copy_terminated, subreq); 342 } 343 344 /* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */ 345 if (atomic_dec_and_test(&rreq->nr_wr_ops)) 346 netfs_rreq_unmark_after_write(rreq, false); 347} 348 349static void netfs_rreq_write_to_cache_work(struct work_struct *work) 350{ 351 struct netfs_read_request *rreq = 352 container_of(work, struct netfs_read_request, work); 353 354 netfs_rreq_do_write_to_cache(rreq); 355} 356 357static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, 358 bool was_async) 359{ 360 if (was_async) { 361 rreq->work.func = netfs_rreq_write_to_cache_work; 362 if (!queue_work(system_unbound_wq, &rreq->work)) 363 BUG(); 364 } else { 365 netfs_rreq_do_write_to_cache(rreq); 366 } 367} 368 369/* 370 * Unlock the folios in a read operation. We need to set PG_fscache on any 371 * folios we're going to write back before we unlock them. 372 */ 373static void netfs_rreq_unlock(struct netfs_read_request *rreq) 374{ 375 struct netfs_read_subrequest *subreq; 376 struct folio *folio; 377 unsigned int iopos, account = 0; 378 pgoff_t start_page = rreq->start / PAGE_SIZE; 379 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 380 bool subreq_failed = false; 381 382 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 383 384 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 385 __clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); 386 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 387 __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); 388 } 389 } 390 391 /* Walk through the pagecache and the I/O request lists simultaneously. 392 * We may have a mixture of cached and uncached sections and we only 393 * really want to write out the uncached sections. This is slightly 394 * complicated by the possibility that we might have huge pages with a 395 * mixture inside. 396 */ 397 subreq = list_first_entry(&rreq->subrequests, 398 struct netfs_read_subrequest, rreq_link); 399 iopos = 0; 400 subreq_failed = (subreq->error < 0); 401 402 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 403 404 rcu_read_lock(); 405 xas_for_each(&xas, folio, last_page) { 406 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; 407 unsigned int pgend = pgpos + folio_size(folio); 408 bool pg_failed = false; 409 410 for (;;) { 411 if (!subreq) { 412 pg_failed = true; 413 break; 414 } 415 if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) 416 folio_start_fscache(folio); 417 pg_failed |= subreq_failed; 418 if (pgend < iopos + subreq->len) 419 break; 420 421 account += subreq->transferred; 422 iopos += subreq->len; 423 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 424 subreq = list_next_entry(subreq, rreq_link); 425 subreq_failed = (subreq->error < 0); 426 } else { 427 subreq = NULL; 428 subreq_failed = false; 429 } 430 if (pgend == iopos) 431 break; 432 } 433 434 if (!pg_failed) { 435 flush_dcache_folio(folio); 436 folio_mark_uptodate(folio); 437 } 438 439 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 440 if (folio_index(folio) == rreq->no_unlock_folio && 441 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 442 _debug("no unlock"); 443 else 444 folio_unlock(folio); 445 } 446 } 447 rcu_read_unlock(); 448 449 task_io_account_read(account); 450 if (rreq->netfs_ops->done) 451 rreq->netfs_ops->done(rreq); 452} 453 454/* 455 * Handle a short read. 456 */ 457static void netfs_rreq_short_read(struct netfs_read_request *rreq, 458 struct netfs_read_subrequest *subreq) 459{ 460 __clear_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); 461 __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags); 462 463 netfs_stat(&netfs_n_rh_short_read); 464 trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short); 465 466 netfs_get_read_subrequest(subreq); 467 atomic_inc(&rreq->nr_rd_ops); 468 if (subreq->source == NETFS_READ_FROM_CACHE) 469 netfs_read_from_cache(rreq, subreq, true); 470 else 471 netfs_read_from_server(rreq, subreq); 472} 473 474/* 475 * Resubmit any short or failed operations. Returns true if we got the rreq 476 * ref back. 477 */ 478static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq) 479{ 480 struct netfs_read_subrequest *subreq; 481 482 WARN_ON(in_interrupt()); 483 484 trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); 485 486 /* We don't want terminating submissions trying to wake us up whilst 487 * we're still going through the list. 488 */ 489 atomic_inc(&rreq->nr_rd_ops); 490 491 __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); 492 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 493 if (subreq->error) { 494 if (subreq->source != NETFS_READ_FROM_CACHE) 495 break; 496 subreq->source = NETFS_DOWNLOAD_FROM_SERVER; 497 subreq->error = 0; 498 netfs_stat(&netfs_n_rh_download_instead); 499 trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); 500 netfs_get_read_subrequest(subreq); 501 atomic_inc(&rreq->nr_rd_ops); 502 netfs_read_from_server(rreq, subreq); 503 } else if (test_bit(NETFS_SREQ_SHORT_READ, &subreq->flags)) { 504 netfs_rreq_short_read(rreq, subreq); 505 } 506 } 507 508 /* If we decrement nr_rd_ops to 0, the usage ref belongs to us. */ 509 if (atomic_dec_and_test(&rreq->nr_rd_ops)) 510 return true; 511 512 wake_up_var(&rreq->nr_rd_ops); 513 return false; 514} 515 516/* 517 * Check to see if the data read is still valid. 518 */ 519static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq) 520{ 521 struct netfs_read_subrequest *subreq; 522 523 if (!rreq->netfs_ops->is_still_valid || 524 rreq->netfs_ops->is_still_valid(rreq)) 525 return; 526 527 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 528 if (subreq->source == NETFS_READ_FROM_CACHE) { 529 subreq->error = -ESTALE; 530 __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); 531 } 532 } 533} 534 535/* 536 * Assess the state of a read request and decide what to do next. 537 * 538 * Note that we could be in an ordinary kernel thread, on a workqueue or in 539 * softirq context at this point. We inherit a ref from the caller. 540 */ 541static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async) 542{ 543 trace_netfs_rreq(rreq, netfs_rreq_trace_assess); 544 545again: 546 netfs_rreq_is_still_valid(rreq); 547 548 if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && 549 test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { 550 if (netfs_rreq_perform_resubmissions(rreq)) 551 goto again; 552 return; 553 } 554 555 netfs_rreq_unlock(rreq); 556 557 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 558 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); 559 560 if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) 561 return netfs_rreq_write_to_cache(rreq, was_async); 562 563 netfs_rreq_completed(rreq, was_async); 564} 565 566static void netfs_rreq_work(struct work_struct *work) 567{ 568 struct netfs_read_request *rreq = 569 container_of(work, struct netfs_read_request, work); 570 netfs_rreq_assess(rreq, false); 571} 572 573/* 574 * Handle the completion of all outstanding I/O operations on a read request. 575 * We inherit a ref from the caller. 576 */ 577static void netfs_rreq_terminated(struct netfs_read_request *rreq, 578 bool was_async) 579{ 580 if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) && 581 was_async) { 582 if (!queue_work(system_unbound_wq, &rreq->work)) 583 BUG(); 584 } else { 585 netfs_rreq_assess(rreq, was_async); 586 } 587} 588 589/** 590 * netfs_subreq_terminated - Note the termination of an I/O operation. 591 * @subreq: The I/O request that has terminated. 592 * @transferred_or_error: The amount of data transferred or an error code. 593 * @was_async: The termination was asynchronous 594 * 595 * This tells the read helper that a contributory I/O operation has terminated, 596 * one way or another, and that it should integrate the results. 597 * 598 * The caller indicates in @transferred_or_error the outcome of the operation, 599 * supplying a positive value to indicate the number of bytes transferred, 0 to 600 * indicate a failure to transfer anything that should be retried or a negative 601 * error code. The helper will look after reissuing I/O operations as 602 * appropriate and writing downloaded data to the cache. 603 * 604 * If @was_async is true, the caller might be running in softirq or interrupt 605 * context and we can't sleep. 606 */ 607void netfs_subreq_terminated(struct netfs_read_subrequest *subreq, 608 ssize_t transferred_or_error, 609 bool was_async) 610{ 611 struct netfs_read_request *rreq = subreq->rreq; 612 int u; 613 614 _enter("[%u]{%llx,%lx},%zd", 615 subreq->debug_index, subreq->start, subreq->flags, 616 transferred_or_error); 617 618 switch (subreq->source) { 619 case NETFS_READ_FROM_CACHE: 620 netfs_stat(&netfs_n_rh_read_done); 621 break; 622 case NETFS_DOWNLOAD_FROM_SERVER: 623 netfs_stat(&netfs_n_rh_download_done); 624 break; 625 default: 626 break; 627 } 628 629 if (IS_ERR_VALUE(transferred_or_error)) { 630 subreq->error = transferred_or_error; 631 trace_netfs_failure(rreq, subreq, transferred_or_error, 632 netfs_fail_read); 633 goto failed; 634 } 635 636 if (WARN(transferred_or_error > subreq->len - subreq->transferred, 637 "Subreq overread: R%x[%x] %zd > %zu - %zu", 638 rreq->debug_id, subreq->debug_index, 639 transferred_or_error, subreq->len, subreq->transferred)) 640 transferred_or_error = subreq->len - subreq->transferred; 641 642 subreq->error = 0; 643 subreq->transferred += transferred_or_error; 644 if (subreq->transferred < subreq->len) 645 goto incomplete; 646 647complete: 648 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 649 if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) 650 set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); 651 652out: 653 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 654 655 /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ 656 u = atomic_dec_return(&rreq->nr_rd_ops); 657 if (u == 0) 658 netfs_rreq_terminated(rreq, was_async); 659 else if (u == 1) 660 wake_up_var(&rreq->nr_rd_ops); 661 662 netfs_put_subrequest(subreq, was_async); 663 return; 664 665incomplete: 666 if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { 667 netfs_clear_unread(subreq); 668 subreq->transferred = subreq->len; 669 goto complete; 670 } 671 672 if (transferred_or_error == 0) { 673 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 674 subreq->error = -ENODATA; 675 goto failed; 676 } 677 } else { 678 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 679 } 680 681 __set_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); 682 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); 683 goto out; 684 685failed: 686 if (subreq->source == NETFS_READ_FROM_CACHE) { 687 netfs_stat(&netfs_n_rh_read_failed); 688 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); 689 } else { 690 netfs_stat(&netfs_n_rh_download_failed); 691 set_bit(NETFS_RREQ_FAILED, &rreq->flags); 692 rreq->error = subreq->error; 693 } 694 goto out; 695} 696EXPORT_SYMBOL(netfs_subreq_terminated); 697 698static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequest *subreq, 699 loff_t i_size) 700{ 701 struct netfs_read_request *rreq = subreq->rreq; 702 struct netfs_cache_resources *cres = &rreq->cache_resources; 703 704 if (cres->ops) 705 return cres->ops->prepare_read(subreq, i_size); 706 if (subreq->start >= rreq->i_size) 707 return NETFS_FILL_WITH_ZEROES; 708 return NETFS_DOWNLOAD_FROM_SERVER; 709} 710 711/* 712 * Work out what sort of subrequest the next one will be. 713 */ 714static enum netfs_read_source 715netfs_rreq_prepare_read(struct netfs_read_request *rreq, 716 struct netfs_read_subrequest *subreq) 717{ 718 enum netfs_read_source source; 719 720 _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); 721 722 source = netfs_cache_prepare_read(subreq, rreq->i_size); 723 if (source == NETFS_INVALID_READ) 724 goto out; 725 726 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 727 /* Call out to the netfs to let it shrink the request to fit 728 * its own I/O sizes and boundaries. If it shinks it here, it 729 * will be called again to make simultaneous calls; if it wants 730 * to make serial calls, it can indicate a short read and then 731 * we will call it again. 732 */ 733 if (subreq->len > rreq->i_size - subreq->start) 734 subreq->len = rreq->i_size - subreq->start; 735 736 if (rreq->netfs_ops->clamp_length && 737 !rreq->netfs_ops->clamp_length(subreq)) { 738 source = NETFS_INVALID_READ; 739 goto out; 740 } 741 } 742 743 if (WARN_ON(subreq->len == 0)) 744 source = NETFS_INVALID_READ; 745 746out: 747 subreq->source = source; 748 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 749 return source; 750} 751 752/* 753 * Slice off a piece of a read request and submit an I/O request for it. 754 */ 755static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq, 756 unsigned int *_debug_index) 757{ 758 struct netfs_read_subrequest *subreq; 759 enum netfs_read_source source; 760 761 subreq = netfs_alloc_subrequest(rreq); 762 if (!subreq) 763 return false; 764 765 subreq->debug_index = (*_debug_index)++; 766 subreq->start = rreq->start + rreq->submitted; 767 subreq->len = rreq->len - rreq->submitted; 768 769 _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); 770 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 771 772 /* Call out to the cache to find out what it can do with the remaining 773 * subset. It tells us in subreq->flags what it decided should be done 774 * and adjusts subreq->len down if the subset crosses a cache boundary. 775 * 776 * Then when we hand the subset, it can choose to take a subset of that 777 * (the starts must coincide), in which case, we go around the loop 778 * again and ask it to download the next piece. 779 */ 780 source = netfs_rreq_prepare_read(rreq, subreq); 781 if (source == NETFS_INVALID_READ) 782 goto subreq_failed; 783 784 atomic_inc(&rreq->nr_rd_ops); 785 786 rreq->submitted += subreq->len; 787 788 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 789 switch (source) { 790 case NETFS_FILL_WITH_ZEROES: 791 netfs_fill_with_zeroes(rreq, subreq); 792 break; 793 case NETFS_DOWNLOAD_FROM_SERVER: 794 netfs_read_from_server(rreq, subreq); 795 break; 796 case NETFS_READ_FROM_CACHE: 797 netfs_read_from_cache(rreq, subreq, false); 798 break; 799 default: 800 BUG(); 801 } 802 803 return true; 804 805subreq_failed: 806 rreq->error = subreq->error; 807 netfs_put_subrequest(subreq, false); 808 return false; 809} 810 811static void netfs_cache_expand_readahead(struct netfs_read_request *rreq, 812 loff_t *_start, size_t *_len, loff_t i_size) 813{ 814 struct netfs_cache_resources *cres = &rreq->cache_resources; 815 816 if (cres->ops && cres->ops->expand_readahead) 817 cres->ops->expand_readahead(cres, _start, _len, i_size); 818} 819 820static void netfs_rreq_expand(struct netfs_read_request *rreq, 821 struct readahead_control *ractl) 822{ 823 /* Give the cache a chance to change the request parameters. The 824 * resultant request must contain the original region. 825 */ 826 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 827 828 /* Give the netfs a chance to change the request parameters. The 829 * resultant request must contain the original region. 830 */ 831 if (rreq->netfs_ops->expand_readahead) 832 rreq->netfs_ops->expand_readahead(rreq); 833 834 /* Expand the request if the cache wants it to start earlier. Note 835 * that the expansion may get further extended if the VM wishes to 836 * insert THPs and the preferred start and/or end wind up in the middle 837 * of THPs. 838 * 839 * If this is the case, however, the THP size should be an integer 840 * multiple of the cache granule size, so we get a whole number of 841 * granules to deal with. 842 */ 843 if (rreq->start != readahead_pos(ractl) || 844 rreq->len != readahead_length(ractl)) { 845 readahead_expand(ractl, rreq->start, rreq->len); 846 rreq->start = readahead_pos(ractl); 847 rreq->len = readahead_length(ractl); 848 849 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 850 netfs_read_trace_expanded); 851 } 852} 853 854/** 855 * netfs_readahead - Helper to manage a read request 856 * @ractl: The description of the readahead request 857 * @ops: The network filesystem's operations for the helper to use 858 * @netfs_priv: Private netfs data to be retained in the request 859 * 860 * Fulfil a readahead request by drawing data from the cache if possible, or 861 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 862 * requests from different sources will get munged together. If necessary, the 863 * readahead window can be expanded in either direction to a more convenient 864 * alighment for RPC efficiency or to make storage in the cache feasible. 865 * 866 * The calling netfs must provide a table of operations, only one of which, 867 * issue_op, is mandatory. It may also be passed a private token, which will 868 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). 869 * 870 * This is usable whether or not caching is enabled. 871 */ 872void netfs_readahead(struct readahead_control *ractl, 873 const struct netfs_read_request_ops *ops, 874 void *netfs_priv) 875{ 876 struct netfs_read_request *rreq; 877 unsigned int debug_index = 0; 878 int ret; 879 880 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 881 882 if (readahead_count(ractl) == 0) 883 goto cleanup; 884 885 rreq = netfs_alloc_read_request(ops, netfs_priv, ractl->file); 886 if (!rreq) 887 goto cleanup; 888 rreq->mapping = ractl->mapping; 889 rreq->start = readahead_pos(ractl); 890 rreq->len = readahead_length(ractl); 891 892 if (ops->begin_cache_operation) { 893 ret = ops->begin_cache_operation(rreq); 894 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 895 goto cleanup_free; 896 } 897 898 netfs_stat(&netfs_n_rh_readahead); 899 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 900 netfs_read_trace_readahead); 901 902 netfs_rreq_expand(rreq, ractl); 903 904 atomic_set(&rreq->nr_rd_ops, 1); 905 do { 906 if (!netfs_rreq_submit_slice(rreq, &debug_index)) 907 break; 908 909 } while (rreq->submitted < rreq->len); 910 911 /* Drop the refs on the folios here rather than in the cache or 912 * filesystem. The locks will be dropped in netfs_rreq_unlock(). 913 */ 914 while (readahead_folio(ractl)) 915 ; 916 917 /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ 918 if (atomic_dec_and_test(&rreq->nr_rd_ops)) 919 netfs_rreq_assess(rreq, false); 920 return; 921 922cleanup_free: 923 netfs_put_read_request(rreq, false); 924 return; 925cleanup: 926 if (netfs_priv) 927 ops->cleanup(ractl->mapping, netfs_priv); 928 return; 929} 930EXPORT_SYMBOL(netfs_readahead); 931 932/** 933 * netfs_readpage - Helper to manage a readpage request 934 * @file: The file to read from 935 * @folio: The folio to read 936 * @ops: The network filesystem's operations for the helper to use 937 * @netfs_priv: Private netfs data to be retained in the request 938 * 939 * Fulfil a readpage request by drawing data from the cache if possible, or the 940 * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests 941 * from different sources will get munged together. 942 * 943 * The calling netfs must provide a table of operations, only one of which, 944 * issue_op, is mandatory. It may also be passed a private token, which will 945 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). 946 * 947 * This is usable whether or not caching is enabled. 948 */ 949int netfs_readpage(struct file *file, 950 struct folio *folio, 951 const struct netfs_read_request_ops *ops, 952 void *netfs_priv) 953{ 954 struct netfs_read_request *rreq; 955 unsigned int debug_index = 0; 956 int ret; 957 958 _enter("%lx", folio_index(folio)); 959 960 rreq = netfs_alloc_read_request(ops, netfs_priv, file); 961 if (!rreq) { 962 if (netfs_priv) 963 ops->cleanup(netfs_priv, folio_file_mapping(folio)); 964 folio_unlock(folio); 965 return -ENOMEM; 966 } 967 rreq->mapping = folio_file_mapping(folio); 968 rreq->start = folio_file_pos(folio); 969 rreq->len = folio_size(folio); 970 971 if (ops->begin_cache_operation) { 972 ret = ops->begin_cache_operation(rreq); 973 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) { 974 folio_unlock(folio); 975 goto out; 976 } 977 } 978 979 netfs_stat(&netfs_n_rh_readpage); 980 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 981 982 netfs_get_read_request(rreq); 983 984 atomic_set(&rreq->nr_rd_ops, 1); 985 do { 986 if (!netfs_rreq_submit_slice(rreq, &debug_index)) 987 break; 988 989 } while (rreq->submitted < rreq->len); 990 991 /* Keep nr_rd_ops incremented so that the ref always belongs to us, and 992 * the service code isn't punted off to a random thread pool to 993 * process. 994 */ 995 do { 996 wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); 997 netfs_rreq_assess(rreq, false); 998 } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)); 999 1000 ret = rreq->error; 1001 if (ret == 0 && rreq->submitted < rreq->len) { 1002 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage); 1003 ret = -EIO; 1004 } 1005out: 1006 netfs_put_read_request(rreq, false); 1007 return ret; 1008} 1009EXPORT_SYMBOL(netfs_readpage); 1010 1011/** 1012 * netfs_skip_folio_read - prep a folio for writing without reading first 1013 * @folio: The folio being prepared 1014 * @pos: starting position for the write 1015 * @len: length of write 1016 * 1017 * In some cases, write_begin doesn't need to read at all: 1018 * - full folio write 1019 * - write that lies in a folio that is completely beyond EOF 1020 * - write that covers the folio from start to EOF or beyond it 1021 * 1022 * If any of these criteria are met, then zero out the unwritten parts 1023 * of the folio and return true. Otherwise, return false. 1024 */ 1025static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len) 1026{ 1027 struct inode *inode = folio_inode(folio); 1028 loff_t i_size = i_size_read(inode); 1029 size_t offset = offset_in_folio(folio, pos); 1030 1031 /* Full folio write */ 1032 if (offset == 0 && len >= folio_size(folio)) 1033 return true; 1034 1035 /* pos beyond last folio in the file */ 1036 if (pos - offset >= i_size) 1037 goto zero_out; 1038 1039 /* Write that covers from the start of the folio to EOF or beyond */ 1040 if (offset == 0 && (pos + len) >= i_size) 1041 goto zero_out; 1042 1043 return false; 1044zero_out: 1045 zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio)); 1046 return true; 1047} 1048 1049/** 1050 * netfs_write_begin - Helper to prepare for writing 1051 * @file: The file to read from 1052 * @mapping: The mapping to read from 1053 * @pos: File position at which the write will begin 1054 * @len: The length of the write (may extend beyond the end of the folio chosen) 1055 * @aop_flags: AOP_* flags 1056 * @_folio: Where to put the resultant folio 1057 * @_fsdata: Place for the netfs to store a cookie 1058 * @ops: The network filesystem's operations for the helper to use 1059 * @netfs_priv: Private netfs data to be retained in the request 1060 * 1061 * Pre-read data for a write-begin request by drawing data from the cache if 1062 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 1063 * Multiple I/O requests from different sources will get munged together. If 1064 * necessary, the readahead window can be expanded in either direction to a 1065 * more convenient alighment for RPC efficiency or to make storage in the cache 1066 * feasible. 1067 * 1068 * The calling netfs must provide a table of operations, only one of which, 1069 * issue_op, is mandatory. 1070 * 1071 * The check_write_begin() operation can be provided to check for and flush 1072 * conflicting writes once the folio is grabbed and locked. It is passed a 1073 * pointer to the fsdata cookie that gets returned to the VM to be passed to 1074 * write_end. It is permitted to sleep. It should return 0 if the request 1075 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to 1076 * be regot; or return an error. 1077 * 1078 * This is usable whether or not caching is enabled. 1079 */ 1080int netfs_write_begin(struct file *file, struct address_space *mapping, 1081 loff_t pos, unsigned int len, unsigned int aop_flags, 1082 struct folio **_folio, void **_fsdata, 1083 const struct netfs_read_request_ops *ops, 1084 void *netfs_priv) 1085{ 1086 struct netfs_read_request *rreq; 1087 struct folio *folio; 1088 struct inode *inode = file_inode(file); 1089 unsigned int debug_index = 0, fgp_flags; 1090 pgoff_t index = pos >> PAGE_SHIFT; 1091 int ret; 1092 1093 DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 1094 1095retry: 1096 fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; 1097 if (aop_flags & AOP_FLAG_NOFS) 1098 fgp_flags |= FGP_NOFS; 1099 folio = __filemap_get_folio(mapping, index, fgp_flags, 1100 mapping_gfp_mask(mapping)); 1101 if (!folio) 1102 return -ENOMEM; 1103 1104 if (ops->check_write_begin) { 1105 /* Allow the netfs (eg. ceph) to flush conflicts. */ 1106 ret = ops->check_write_begin(file, pos, len, folio, _fsdata); 1107 if (ret < 0) { 1108 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 1109 if (ret == -EAGAIN) 1110 goto retry; 1111 goto error; 1112 } 1113 } 1114 1115 if (folio_test_uptodate(folio)) 1116 goto have_folio; 1117 1118 /* If the page is beyond the EOF, we want to clear it - unless it's 1119 * within the cache granule containing the EOF, in which case we need 1120 * to preload the granule. 1121 */ 1122 if (!ops->is_cache_enabled(inode) && 1123 netfs_skip_folio_read(folio, pos, len)) { 1124 netfs_stat(&netfs_n_rh_write_zskip); 1125 goto have_folio_no_wait; 1126 } 1127 1128 ret = -ENOMEM; 1129 rreq = netfs_alloc_read_request(ops, netfs_priv, file); 1130 if (!rreq) 1131 goto error; 1132 rreq->mapping = folio_file_mapping(folio); 1133 rreq->start = folio_file_pos(folio); 1134 rreq->len = folio_size(folio); 1135 rreq->no_unlock_folio = folio_index(folio); 1136 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 1137 netfs_priv = NULL; 1138 1139 if (ops->begin_cache_operation) { 1140 ret = ops->begin_cache_operation(rreq); 1141 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 1142 goto error_put; 1143 } 1144 1145 netfs_stat(&netfs_n_rh_write_begin); 1146 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 1147 1148 /* Expand the request to meet caching requirements and download 1149 * preferences. 1150 */ 1151 ractl._nr_pages = folio_nr_pages(folio); 1152 netfs_rreq_expand(rreq, &ractl); 1153 netfs_get_read_request(rreq); 1154 1155 /* We hold the folio locks, so we can drop the references */ 1156 folio_get(folio); 1157 while (readahead_folio(&ractl)) 1158 ; 1159 1160 atomic_set(&rreq->nr_rd_ops, 1); 1161 do { 1162 if (!netfs_rreq_submit_slice(rreq, &debug_index)) 1163 break; 1164 1165 } while (rreq->submitted < rreq->len); 1166 1167 /* Keep nr_rd_ops incremented so that the ref always belongs to us, and 1168 * the service code isn't punted off to a random thread pool to 1169 * process. 1170 */ 1171 for (;;) { 1172 wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); 1173 netfs_rreq_assess(rreq, false); 1174 if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) 1175 break; 1176 cond_resched(); 1177 } 1178 1179 ret = rreq->error; 1180 if (ret == 0 && rreq->submitted < rreq->len) { 1181 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin); 1182 ret = -EIO; 1183 } 1184 netfs_put_read_request(rreq, false); 1185 if (ret < 0) 1186 goto error; 1187 1188have_folio: 1189 ret = folio_wait_fscache_killable(folio); 1190 if (ret < 0) 1191 goto error; 1192have_folio_no_wait: 1193 if (netfs_priv) 1194 ops->cleanup(netfs_priv, mapping); 1195 *_folio = folio; 1196 _leave(" = 0"); 1197 return 0; 1198 1199error_put: 1200 netfs_put_read_request(rreq, false); 1201error: 1202 folio_unlock(folio); 1203 folio_put(folio); 1204 if (netfs_priv) 1205 ops->cleanup(netfs_priv, mapping); 1206 _leave(" = %d", ret); 1207 return ret; 1208} 1209EXPORT_SYMBOL(netfs_write_begin);