Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfs: Speed up buffered reading

Improve the efficiency of buffered reads in a number of ways:

(1) Overhaul the algorithm in general so that it's a lot more compact and
split the read submission code between buffered and unbuffered
versions. The unbuffered version can be vastly simplified.

(2) Read-result collection is handed off to a work queue rather than being
done in the I/O thread. Multiple subrequests can be processes
simultaneously.

(3) When a subrequest is collected, any folios it fully spans are
collected and "spare" data on either side is donated to either the
previous or the next subrequest in the sequence.

Notes:

(*) Readahead expansion is massively slows down fio, presumably because it
causes a load of extra allocations, both folio and xarray, up front
before RPC requests can be transmitted.

(*) RDMA with cifs does appear to work, both with SIW and RXE.

(*) PG_private_2-based reading and copy-to-cache is split out into its own
file and altered to use folio_queue. Note that the copy to the cache
now creates a new write transaction against the cache and adds the
folios to be copied into it. This allows it to use part of the
writeback I/O code.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

David Howells and committed by
Christian Brauner
ee4cdf7b 2e45b922

+2063 -475
+8 -3
fs/9p/vfs_addr.c
··· 68 68 { 69 69 struct netfs_io_request *rreq = subreq->rreq; 70 70 struct p9_fid *fid = rreq->netfs_priv; 71 + unsigned long long pos = subreq->start + subreq->transferred; 71 72 int total, err; 72 73 73 - total = p9_client_read(fid, subreq->start + subreq->transferred, 74 - &subreq->io_iter, &err); 74 + total = p9_client_read(fid, pos, &subreq->io_iter, &err); 75 75 76 76 /* if we just extended the file size, any portion not in 77 77 * cache won't be on server and is zeroes */ 78 78 if (subreq->rreq->origin != NETFS_DIO_READ) 79 79 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 80 + if (pos + total >= i_size_read(rreq->inode)) 81 + __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 80 82 81 - netfs_subreq_terminated(subreq, err ?: total, false); 83 + if (!err) 84 + subreq->transferred += total; 85 + 86 + netfs_read_subreq_terminated(subreq, err, false); 82 87 } 83 88 84 89 /**
+15 -6
fs/afs/file.c
··· 16 16 #include <linux/mm.h> 17 17 #include <linux/swap.h> 18 18 #include <linux/netfs.h> 19 + #include <trace/events/netfs.h> 19 20 #include "internal.h" 20 21 21 22 static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); ··· 243 242 244 243 req->error = error; 245 244 if (subreq) { 246 - if (subreq->rreq->origin != NETFS_DIO_READ) 247 - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 248 - netfs_subreq_terminated(subreq, error ?: req->actual_len, false); 245 + subreq->rreq->i_size = req->file_size; 246 + if (req->pos + req->actual_len >= req->file_size) 247 + __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 248 + netfs_read_subreq_terminated(subreq, error, false); 249 249 req->subreq = NULL; 250 250 } else if (req->done) { 251 251 req->done(req); ··· 264 262 afs_fetch_data_notify(op); 265 263 } 266 264 265 + static void afs_fetch_data_aborted(struct afs_operation *op) 266 + { 267 + afs_check_for_remote_deletion(op); 268 + afs_fetch_data_notify(op); 269 + } 270 + 267 271 static void afs_fetch_data_put(struct afs_operation *op) 268 272 { 269 273 op->fetch.req->error = afs_op_error(op); ··· 280 272 .issue_afs_rpc = afs_fs_fetch_data, 281 273 .issue_yfs_rpc = yfs_fs_fetch_data, 282 274 .success = afs_fetch_data_success, 283 - .aborted = afs_check_for_remote_deletion, 275 + .aborted = afs_fetch_data_aborted, 284 276 .failed = afs_fetch_data_notify, 285 277 .put = afs_fetch_data_put, 286 278 }; ··· 302 294 op = afs_alloc_operation(req->key, vnode->volume); 303 295 if (IS_ERR(op)) { 304 296 if (req->subreq) 305 - netfs_subreq_terminated(req->subreq, PTR_ERR(op), false); 297 + netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false); 306 298 return PTR_ERR(op); 307 299 } 308 300 ··· 321 313 322 314 fsreq = afs_alloc_read(GFP_NOFS); 323 315 if (!fsreq) 324 - return netfs_subreq_terminated(subreq, -ENOMEM, false); 316 + return netfs_read_subreq_terminated(subreq, -ENOMEM, false); 325 317 326 318 fsreq->subreq = subreq; 327 319 fsreq->pos = subreq->start + subreq->transferred; ··· 330 322 fsreq->vnode = vnode; 331 323 fsreq->iter = &subreq->io_iter; 332 324 325 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 333 326 afs_fetch_data(fsreq->vnode, fsreq); 334 327 afs_put_read(fsreq); 335 328 }
+7 -2
fs/afs/fsclient.c
··· 304 304 struct afs_vnode_param *vp = &op->file[0]; 305 305 struct afs_read *req = op->fetch.req; 306 306 const __be32 *bp; 307 + size_t count_before; 307 308 int ret; 308 309 309 310 _enter("{%u,%zu,%zu/%llu}", ··· 346 345 347 346 /* extract the returned data */ 348 347 case 2: 349 - _debug("extract data %zu/%llu", 350 - iov_iter_count(call->iter), req->actual_len); 348 + count_before = call->iov_len; 349 + _debug("extract data %zu/%llu", count_before, req->actual_len); 351 350 352 351 ret = afs_extract_data(call, true); 352 + if (req->subreq) { 353 + req->subreq->transferred += count_before - call->iov_len; 354 + netfs_read_subreq_progress(req->subreq, false); 355 + } 353 356 if (ret < 0) 354 357 return ret; 355 358
+7 -2
fs/afs/yfsclient.c
··· 355 355 struct afs_vnode_param *vp = &op->file[0]; 356 356 struct afs_read *req = op->fetch.req; 357 357 const __be32 *bp; 358 + size_t count_before; 358 359 int ret; 359 360 360 361 _enter("{%u,%zu, %zu/%llu}", ··· 392 391 393 392 /* extract the returned data */ 394 393 case 2: 395 - _debug("extract data %zu/%llu", 396 - iov_iter_count(call->iter), req->actual_len); 394 + count_before = call->iov_len; 395 + _debug("extract data %zu/%llu", count_before, req->actual_len); 397 396 398 397 ret = afs_extract_data(call, true); 398 + if (req->subreq) { 399 + req->subreq->transferred += count_before - call->iov_len; 400 + netfs_read_subreq_progress(req->subreq, false); 401 + } 399 402 if (ret < 0) 400 403 return ret; 401 404
+46 -30
fs/ceph/addr.c
··· 13 13 #include <linux/iversion.h> 14 14 #include <linux/ktime.h> 15 15 #include <linux/netfs.h> 16 + #include <trace/events/netfs.h> 16 17 17 18 #include "super.h" 18 19 #include "mds_client.h" ··· 206 205 } 207 206 } 208 207 209 - static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) 210 - { 211 - struct inode *inode = subreq->rreq->inode; 212 - struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); 213 - struct ceph_inode_info *ci = ceph_inode(inode); 214 - u64 objno, objoff; 215 - u32 xlen; 216 - 217 - /* Truncate the extent at the end of the current block */ 218 - ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, 219 - &objno, &objoff, &xlen); 220 - subreq->len = min(xlen, fsc->mount_options->rsize); 221 - return true; 222 - } 223 - 224 208 static void finish_netfs_read(struct ceph_osd_request *req) 225 209 { 226 210 struct inode *inode = req->r_inode; ··· 250 264 calc_pages_for(osd_data->alignment, 251 265 osd_data->length), false); 252 266 } 253 - netfs_subreq_terminated(subreq, err, false); 267 + if (err > 0) { 268 + subreq->transferred = err; 269 + err = 0; 270 + } 271 + trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress); 272 + netfs_read_subreq_terminated(subreq, err, false); 254 273 iput(req->r_inode); 255 274 ceph_dec_osd_stopping_blocker(fsc->mdsc); 256 275 } ··· 269 278 struct ceph_mds_request *req; 270 279 struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 271 280 struct ceph_inode_info *ci = ceph_inode(inode); 272 - struct iov_iter iter; 273 281 ssize_t err = 0; 274 282 size_t len; 275 283 int mode; ··· 291 301 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); 292 302 req->r_num_caps = 2; 293 303 304 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 294 305 err = ceph_mdsc_do_request(mdsc, NULL, req); 295 306 if (err < 0) 296 307 goto out; ··· 305 314 } 306 315 307 316 len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); 308 - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); 309 - err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); 310 - if (err == 0) 317 + err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter); 318 + if (err == 0) { 311 319 err = -EFAULT; 320 + } else { 321 + subreq->transferred += err; 322 + err = 0; 323 + } 312 324 313 325 ceph_mdsc_put_request(req); 314 326 out: 315 - netfs_subreq_terminated(subreq, err, false); 327 + netfs_read_subreq_terminated(subreq, err, false); 316 328 return true; 329 + } 330 + 331 + static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq) 332 + { 333 + struct netfs_io_request *rreq = subreq->rreq; 334 + struct inode *inode = rreq->inode; 335 + struct ceph_inode_info *ci = ceph_inode(inode); 336 + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); 337 + u64 objno, objoff; 338 + u32 xlen; 339 + 340 + /* Truncate the extent at the end of the current block */ 341 + ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, 342 + &objno, &objoff, &xlen); 343 + rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize); 344 + return 0; 317 345 } 318 346 319 347 static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ··· 344 334 struct ceph_client *cl = fsc->client; 345 335 struct ceph_osd_request *req = NULL; 346 336 struct ceph_vino vino = ceph_vino(inode); 347 - struct iov_iter iter; 348 - int err = 0; 349 - u64 len = subreq->len; 337 + int err; 338 + u64 len; 350 339 bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); 351 340 u64 off = subreq->start; 352 341 int extent_cnt; ··· 358 349 if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) 359 350 return; 360 351 352 + // TODO: This rounding here is slightly dodgy. It *should* work, for 353 + // now, as the cache only deals in blocks that are a multiple of 354 + // PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to 355 + // happen is for the fscrypt driving to be moved into netfslib and the 356 + // data in the cache also to be stored encrypted. 357 + len = subreq->len; 361 358 ceph_fscrypt_adjust_off_and_len(inode, &off, &len); 362 359 363 360 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, ··· 386 371 doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", 387 372 ceph_vinop(inode), subreq->start, subreq->len, len); 388 373 389 - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); 390 - 391 374 /* 392 375 * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for 393 376 * encrypted inodes. We'd need infrastructure that handles an iov_iter ··· 397 384 struct page **pages; 398 385 size_t page_off; 399 386 400 - err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); 387 + err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off); 401 388 if (err < 0) { 402 389 doutc(cl, "%llx.%llx failed to allocate pages, %d\n", 403 390 ceph_vinop(inode), err); ··· 412 399 osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, 413 400 false); 414 401 } else { 415 - osd_req_op_extent_osd_iter(req, 0, &iter); 402 + osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter); 416 403 } 417 404 if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { 418 405 err = -EIO; ··· 423 410 req->r_inode = inode; 424 411 ihold(inode); 425 412 413 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 426 414 ceph_osdc_start_request(req->r_osdc, req); 427 415 out: 428 416 ceph_osdc_put_request(req); 429 417 if (err) 430 - netfs_subreq_terminated(subreq, err, false); 418 + netfs_read_subreq_terminated(subreq, err, false); 431 419 doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); 432 420 } 433 421 434 422 static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) 435 423 { 436 424 struct inode *inode = rreq->inode; 425 + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); 437 426 struct ceph_client *cl = ceph_inode_to_client(inode); 438 427 int got = 0, want = CEPH_CAP_FILE_CACHE; 439 428 struct ceph_netfs_request_data *priv; ··· 487 472 488 473 priv->caps = got; 489 474 rreq->netfs_priv = priv; 475 + rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize; 490 476 491 477 out: 492 478 if (ret < 0) ··· 512 496 const struct netfs_request_ops ceph_netfs_ops = { 513 497 .init_request = ceph_init_request, 514 498 .free_request = ceph_netfs_free_request, 499 + .prepare_read = ceph_netfs_prepare_read, 515 500 .issue_read = ceph_netfs_issue_read, 516 501 .expand_readahead = ceph_netfs_expand_readahead, 517 - .clamp_length = ceph_netfs_clamp_length, 518 502 .check_write_begin = ceph_netfs_check_write_begin, 519 503 }; 520 504
+3 -1
fs/netfs/Makefile
··· 5 5 buffered_write.o \ 6 6 direct_read.o \ 7 7 direct_write.o \ 8 - io.o \ 9 8 iterator.o \ 10 9 locking.o \ 11 10 main.o \ 12 11 misc.o \ 13 12 objects.o \ 13 + read_collect.o \ 14 + read_pgpriv2.o \ 15 + read_retry.o \ 14 16 write_collect.o \ 15 17 write_issue.o 16 18
+476 -300
fs/netfs/buffered_read.c
··· 9 9 #include <linux/task_io_accounting_ops.h> 10 10 #include "internal.h" 11 11 12 - /* 13 - * [DEPRECATED] Unlock the folios in a read operation for when the filesystem 14 - * is using PG_private_2 and direct writing to the cache from here rather than 15 - * marking the page for writeback. 16 - * 17 - * Note that we don't touch folio->private in this code. 18 - */ 19 - static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, 20 - size_t *account) 21 - { 22 - struct netfs_io_subrequest *subreq; 23 - struct folio *folio; 24 - pgoff_t start_page = rreq->start / PAGE_SIZE; 25 - pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 26 - bool subreq_failed = false; 27 - 28 - XA_STATE(xas, &rreq->mapping->i_pages, start_page); 29 - 30 - /* Walk through the pagecache and the I/O request lists simultaneously. 31 - * We may have a mixture of cached and uncached sections and we only 32 - * really want to write out the uncached sections. This is slightly 33 - * complicated by the possibility that we might have huge pages with a 34 - * mixture inside. 35 - */ 36 - subreq = list_first_entry(&rreq->subrequests, 37 - struct netfs_io_subrequest, rreq_link); 38 - subreq_failed = (subreq->error < 0); 39 - 40 - trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); 41 - 42 - rcu_read_lock(); 43 - xas_for_each(&xas, folio, last_page) { 44 - loff_t pg_end; 45 - bool pg_failed = false; 46 - bool folio_started = false; 47 - 48 - if (xas_retry(&xas, folio)) 49 - continue; 50 - 51 - pg_end = folio_pos(folio) + folio_size(folio) - 1; 52 - 53 - for (;;) { 54 - loff_t sreq_end; 55 - 56 - if (!subreq) { 57 - pg_failed = true; 58 - break; 59 - } 60 - 61 - if (!folio_started && 62 - test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && 63 - fscache_operation_valid(&rreq->cache_resources)) { 64 - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 65 - folio_start_private_2(folio); 66 - folio_started = true; 67 - } 68 - 69 - pg_failed |= subreq_failed; 70 - sreq_end = subreq->start + subreq->len - 1; 71 - if (pg_end < sreq_end) 72 - break; 73 - 74 - *account += subreq->transferred; 75 - if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 76 - subreq = list_next_entry(subreq, rreq_link); 77 - subreq_failed = (subreq->error < 0); 78 - } else { 79 - subreq = NULL; 80 - subreq_failed = false; 81 - } 82 - 83 - if (pg_end == sreq_end) 84 - break; 85 - } 86 - 87 - if (!pg_failed) { 88 - flush_dcache_folio(folio); 89 - folio_mark_uptodate(folio); 90 - } 91 - 92 - if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 93 - if (folio->index == rreq->no_unlock_folio && 94 - test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 95 - _debug("no unlock"); 96 - else 97 - folio_unlock(folio); 98 - } 99 - } 100 - rcu_read_unlock(); 101 - } 102 - 103 - /* 104 - * Unlock the folios in a read operation. We need to set PG_writeback on any 105 - * folios we're going to write back before we unlock them. 106 - * 107 - * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use 108 - * PG_private_2 and do a direct write to the cache from here instead. 109 - */ 110 - void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 111 - { 112 - struct netfs_io_subrequest *subreq; 113 - struct netfs_folio *finfo; 114 - struct folio *folio; 115 - pgoff_t start_page = rreq->start / PAGE_SIZE; 116 - pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 117 - size_t account = 0; 118 - bool subreq_failed = false; 119 - 120 - XA_STATE(xas, &rreq->mapping->i_pages, start_page); 121 - 122 - if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 123 - __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); 124 - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 125 - __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 126 - } 127 - } 128 - 129 - /* Handle deprecated PG_private_2 case. */ 130 - if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 131 - netfs_rreq_unlock_folios_pgpriv2(rreq, &account); 132 - goto out; 133 - } 134 - 135 - /* Walk through the pagecache and the I/O request lists simultaneously. 136 - * We may have a mixture of cached and uncached sections and we only 137 - * really want to write out the uncached sections. This is slightly 138 - * complicated by the possibility that we might have huge pages with a 139 - * mixture inside. 140 - */ 141 - subreq = list_first_entry(&rreq->subrequests, 142 - struct netfs_io_subrequest, rreq_link); 143 - subreq_failed = (subreq->error < 0); 144 - 145 - trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 146 - 147 - rcu_read_lock(); 148 - xas_for_each(&xas, folio, last_page) { 149 - loff_t pg_end; 150 - bool pg_failed = false; 151 - bool wback_to_cache = false; 152 - 153 - if (xas_retry(&xas, folio)) 154 - continue; 155 - 156 - pg_end = folio_pos(folio) + folio_size(folio) - 1; 157 - 158 - for (;;) { 159 - loff_t sreq_end; 160 - 161 - if (!subreq) { 162 - pg_failed = true; 163 - break; 164 - } 165 - 166 - wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 167 - pg_failed |= subreq_failed; 168 - sreq_end = subreq->start + subreq->len - 1; 169 - if (pg_end < sreq_end) 170 - break; 171 - 172 - account += subreq->transferred; 173 - if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 174 - subreq = list_next_entry(subreq, rreq_link); 175 - subreq_failed = (subreq->error < 0); 176 - } else { 177 - subreq = NULL; 178 - subreq_failed = false; 179 - } 180 - 181 - if (pg_end == sreq_end) 182 - break; 183 - } 184 - 185 - if (!pg_failed) { 186 - flush_dcache_folio(folio); 187 - finfo = netfs_folio_info(folio); 188 - if (finfo) { 189 - trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 190 - if (finfo->netfs_group) 191 - folio_change_private(folio, finfo->netfs_group); 192 - else 193 - folio_detach_private(folio); 194 - kfree(finfo); 195 - } 196 - folio_mark_uptodate(folio); 197 - if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 198 - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 199 - folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 200 - filemap_dirty_folio(folio->mapping, folio); 201 - } 202 - } 203 - 204 - if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 205 - if (folio->index == rreq->no_unlock_folio && 206 - test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 207 - _debug("no unlock"); 208 - else 209 - folio_unlock(folio); 210 - } 211 - } 212 - rcu_read_unlock(); 213 - 214 - out: 215 - task_io_account_read(account); 216 - if (rreq->netfs_ops->done) 217 - rreq->netfs_ops->done(rreq); 218 - } 219 - 220 12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 221 13 unsigned long long *_start, 222 14 unsigned long long *_len, ··· 63 271 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 64 272 } 65 273 274 + /* 275 + * Decant the list of folios to read into a rolling buffer. 276 + */ 277 + static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, 278 + struct folio_queue *folioq) 279 + { 280 + unsigned int order, nr; 281 + size_t size = 0; 282 + 283 + nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios, 284 + ARRAY_SIZE(folioq->vec.folios)); 285 + folioq->vec.nr = nr; 286 + for (int i = 0; i < nr; i++) { 287 + struct folio *folio = folioq_folio(folioq, i); 288 + 289 + trace_netfs_folio(folio, netfs_folio_trace_read); 290 + order = folio_order(folio); 291 + folioq->orders[i] = order; 292 + size += PAGE_SIZE << order; 293 + } 294 + 295 + for (int i = nr; i < folioq_nr_slots(folioq); i++) 296 + folioq_clear(folioq, i); 297 + 298 + return size; 299 + } 300 + 301 + /* 302 + * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O 303 + * @subreq: The subrequest to be set up 304 + * 305 + * Prepare the I/O iterator representing the read buffer on a subrequest for 306 + * the filesystem to use for I/O (it can be passed directly to a socket). This 307 + * is intended to be called from the ->issue_read() method once the filesystem 308 + * has trimmed the request to the size it wants. 309 + * 310 + * Returns the limited size if successful and -ENOMEM if insufficient memory 311 + * available. 312 + * 313 + * [!] NOTE: This must be run in the same thread as ->issue_read() was called 314 + * in as we access the readahead_control struct. 315 + */ 316 + static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) 317 + { 318 + struct netfs_io_request *rreq = subreq->rreq; 319 + size_t rsize = subreq->len; 320 + 321 + if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) 322 + rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); 323 + 324 + if (rreq->ractl) { 325 + /* If we don't have sufficient folios in the rolling buffer, 326 + * extract a folioq's worth from the readahead region at a time 327 + * into the buffer. Note that this acquires a ref on each page 328 + * that we will need to release later - but we don't want to do 329 + * that until after we've started the I/O. 330 + */ 331 + while (rreq->submitted < subreq->start + rsize) { 332 + struct folio_queue *tail = rreq->buffer_tail, *new; 333 + size_t added; 334 + 335 + new = kmalloc(sizeof(*new), GFP_NOFS); 336 + if (!new) 337 + return -ENOMEM; 338 + netfs_stat(&netfs_n_folioq); 339 + folioq_init(new); 340 + new->prev = tail; 341 + tail->next = new; 342 + rreq->buffer_tail = new; 343 + added = netfs_load_buffer_from_ra(rreq, new); 344 + rreq->iter.count += added; 345 + rreq->submitted += added; 346 + } 347 + } 348 + 349 + subreq->len = rsize; 350 + if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 351 + size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize, 352 + rreq->io_streams[0].sreq_max_segs); 353 + 354 + if (limit < rsize) { 355 + subreq->len = limit; 356 + trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 357 + } 358 + } 359 + 360 + subreq->io_iter = rreq->iter; 361 + 362 + if (iov_iter_is_folioq(&subreq->io_iter)) { 363 + if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) { 364 + subreq->io_iter.folioq = subreq->io_iter.folioq->next; 365 + subreq->io_iter.folioq_slot = 0; 366 + } 367 + subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq; 368 + subreq->curr_folioq_slot = subreq->io_iter.folioq_slot; 369 + subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; 370 + } 371 + 372 + iov_iter_truncate(&subreq->io_iter, subreq->len); 373 + iov_iter_advance(&rreq->iter, subreq->len); 374 + return subreq->len; 375 + } 376 + 377 + static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq, 378 + struct netfs_io_subrequest *subreq, 379 + loff_t i_size) 380 + { 381 + struct netfs_cache_resources *cres = &rreq->cache_resources; 382 + 383 + if (!cres->ops) 384 + return NETFS_DOWNLOAD_FROM_SERVER; 385 + return cres->ops->prepare_read(subreq, i_size); 386 + } 387 + 388 + static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, 389 + bool was_async) 390 + { 391 + struct netfs_io_subrequest *subreq = priv; 392 + 393 + if (transferred_or_error < 0) { 394 + netfs_read_subreq_terminated(subreq, transferred_or_error, was_async); 395 + return; 396 + } 397 + 398 + if (transferred_or_error > 0) 399 + subreq->transferred += transferred_or_error; 400 + netfs_read_subreq_terminated(subreq, 0, was_async); 401 + } 402 + 403 + /* 404 + * Issue a read against the cache. 405 + * - Eats the caller's ref on subreq. 406 + */ 407 + static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, 408 + struct netfs_io_subrequest *subreq) 409 + { 410 + struct netfs_cache_resources *cres = &rreq->cache_resources; 411 + 412 + netfs_stat(&netfs_n_rh_read); 413 + cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE, 414 + netfs_cache_read_terminated, subreq); 415 + } 416 + 417 + /* 418 + * Perform a read to the pagecache from a series of sources of different types, 419 + * slicing up the region to be read according to available cache blocks and 420 + * network rsize. 421 + */ 422 + static void netfs_read_to_pagecache(struct netfs_io_request *rreq) 423 + { 424 + struct netfs_inode *ictx = netfs_inode(rreq->inode); 425 + unsigned long long start = rreq->start; 426 + ssize_t size = rreq->len; 427 + int ret = 0; 428 + 429 + atomic_inc(&rreq->nr_outstanding); 430 + 431 + do { 432 + struct netfs_io_subrequest *subreq; 433 + enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; 434 + ssize_t slice; 435 + 436 + subreq = netfs_alloc_subrequest(rreq); 437 + if (!subreq) { 438 + ret = -ENOMEM; 439 + break; 440 + } 441 + 442 + subreq->start = start; 443 + subreq->len = size; 444 + 445 + atomic_inc(&rreq->nr_outstanding); 446 + spin_lock_bh(&rreq->lock); 447 + list_add_tail(&subreq->rreq_link, &rreq->subrequests); 448 + subreq->prev_donated = rreq->prev_donated; 449 + rreq->prev_donated = 0; 450 + trace_netfs_sreq(subreq, netfs_sreq_trace_added); 451 + spin_unlock_bh(&rreq->lock); 452 + 453 + source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); 454 + subreq->source = source; 455 + if (source == NETFS_DOWNLOAD_FROM_SERVER) { 456 + unsigned long long zp = umin(ictx->zero_point, rreq->i_size); 457 + size_t len = subreq->len; 458 + 459 + if (subreq->start >= zp) { 460 + subreq->source = source = NETFS_FILL_WITH_ZEROES; 461 + goto fill_with_zeroes; 462 + } 463 + 464 + if (len > zp - subreq->start) 465 + len = zp - subreq->start; 466 + if (len == 0) { 467 + pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", 468 + rreq->debug_id, subreq->debug_index, 469 + subreq->len, size, 470 + subreq->start, ictx->zero_point, rreq->i_size); 471 + break; 472 + } 473 + subreq->len = len; 474 + 475 + netfs_stat(&netfs_n_rh_download); 476 + if (rreq->netfs_ops->prepare_read) { 477 + ret = rreq->netfs_ops->prepare_read(subreq); 478 + if (ret < 0) { 479 + atomic_dec(&rreq->nr_outstanding); 480 + netfs_put_subrequest(subreq, false, 481 + netfs_sreq_trace_put_cancel); 482 + break; 483 + } 484 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 485 + } 486 + 487 + slice = netfs_prepare_read_iterator(subreq); 488 + if (slice < 0) { 489 + atomic_dec(&rreq->nr_outstanding); 490 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 491 + ret = slice; 492 + break; 493 + } 494 + 495 + rreq->netfs_ops->issue_read(subreq); 496 + goto done; 497 + } 498 + 499 + fill_with_zeroes: 500 + if (source == NETFS_FILL_WITH_ZEROES) { 501 + subreq->source = NETFS_FILL_WITH_ZEROES; 502 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 503 + netfs_stat(&netfs_n_rh_zero); 504 + slice = netfs_prepare_read_iterator(subreq); 505 + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 506 + netfs_read_subreq_terminated(subreq, 0, false); 507 + goto done; 508 + } 509 + 510 + if (source == NETFS_READ_FROM_CACHE) { 511 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 512 + slice = netfs_prepare_read_iterator(subreq); 513 + netfs_read_cache_to_pagecache(rreq, subreq); 514 + goto done; 515 + } 516 + 517 + pr_err("Unexpected read source %u\n", source); 518 + WARN_ON_ONCE(1); 519 + break; 520 + 521 + done: 522 + size -= slice; 523 + start += slice; 524 + cond_resched(); 525 + } while (size > 0); 526 + 527 + if (atomic_dec_and_test(&rreq->nr_outstanding)) 528 + netfs_rreq_terminated(rreq, false); 529 + 530 + /* Defer error return as we may need to wait for outstanding I/O. */ 531 + cmpxchg(&rreq->error, 0, ret); 532 + } 533 + 534 + /* 535 + * Wait for the read operation to complete, successfully or otherwise. 536 + */ 537 + static int netfs_wait_for_read(struct netfs_io_request *rreq) 538 + { 539 + int ret; 540 + 541 + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 542 + wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); 543 + ret = rreq->error; 544 + if (ret == 0 && rreq->submitted < rreq->len) { 545 + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 546 + ret = -EIO; 547 + } 548 + 549 + return ret; 550 + } 551 + 552 + /* 553 + * Set up the initial folioq of buffer folios in the rolling buffer and set the 554 + * iterator to refer to it. 555 + */ 556 + static int netfs_prime_buffer(struct netfs_io_request *rreq) 557 + { 558 + struct folio_queue *folioq; 559 + size_t added; 560 + 561 + folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 562 + if (!folioq) 563 + return -ENOMEM; 564 + netfs_stat(&netfs_n_folioq); 565 + folioq_init(folioq); 566 + rreq->buffer = folioq; 567 + rreq->buffer_tail = folioq; 568 + rreq->submitted = rreq->start; 569 + iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0); 570 + 571 + added = netfs_load_buffer_from_ra(rreq, folioq); 572 + rreq->iter.count += added; 573 + rreq->submitted += added; 574 + return 0; 575 + } 576 + 577 + /* 578 + * Drop the ref on each folio that we inherited from the VM readahead code. We 579 + * still have the folio locks to pin the page until we complete the I/O. 580 + * 581 + * Note that we can't just release the batch in each queue struct as we use the 582 + * occupancy count in other places. 583 + */ 584 + static void netfs_put_ra_refs(struct folio_queue *folioq) 585 + { 586 + struct folio_batch fbatch; 587 + 588 + folio_batch_init(&fbatch); 589 + while (folioq) { 590 + for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) { 591 + struct folio *folio = folioq_folio(folioq, slot); 592 + if (!folio) 593 + continue; 594 + trace_netfs_folio(folio, netfs_folio_trace_read_put); 595 + if (!folio_batch_add(&fbatch, folio)) 596 + folio_batch_release(&fbatch); 597 + } 598 + folioq = folioq->next; 599 + } 600 + 601 + folio_batch_release(&fbatch); 602 + } 603 + 66 604 /** 67 605 * netfs_readahead - Helper to manage a read request 68 606 * @ractl: The description of the readahead request ··· 411 289 void netfs_readahead(struct readahead_control *ractl) 412 290 { 413 291 struct netfs_io_request *rreq; 414 - struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); 292 + struct netfs_inode *ictx = netfs_inode(ractl->mapping->host); 293 + unsigned long long start = readahead_pos(ractl); 294 + size_t size = readahead_length(ractl); 415 295 int ret; 416 296 417 - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 418 - 419 - if (readahead_count(ractl) == 0) 420 - return; 421 - 422 - rreq = netfs_alloc_request(ractl->mapping, ractl->file, 423 - readahead_pos(ractl), 424 - readahead_length(ractl), 297 + rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size, 425 298 NETFS_READAHEAD); 426 299 if (IS_ERR(rreq)) 427 300 return; 428 301 429 - ret = netfs_begin_cache_read(rreq, ctx); 302 + ret = netfs_begin_cache_read(rreq, ictx); 430 303 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 431 304 goto cleanup_free; 432 305 ··· 431 314 432 315 netfs_rreq_expand(rreq, ractl); 433 316 434 - /* Set up the output buffer */ 435 - iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages, 436 - rreq->start, rreq->len); 317 + rreq->ractl = ractl; 318 + if (netfs_prime_buffer(rreq) < 0) 319 + goto cleanup_free; 320 + netfs_read_to_pagecache(rreq); 437 321 438 - /* Drop the refs on the folios here rather than in the cache or 439 - * filesystem. The locks will be dropped in netfs_rreq_unlock(). 440 - */ 441 - while (readahead_folio(ractl)) 442 - ; 322 + /* Release the folio refs whilst we're waiting for the I/O. */ 323 + netfs_put_ra_refs(rreq->buffer); 443 324 444 - netfs_begin_read(rreq, false); 445 - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 325 + netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 446 326 return; 447 327 448 328 cleanup_free: ··· 447 333 return; 448 334 } 449 335 EXPORT_SYMBOL(netfs_readahead); 336 + 337 + /* 338 + * Create a rolling buffer with a single occupying folio. 339 + */ 340 + static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio) 341 + { 342 + struct folio_queue *folioq; 343 + 344 + folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 345 + if (!folioq) 346 + return -ENOMEM; 347 + 348 + netfs_stat(&netfs_n_folioq); 349 + folioq_init(folioq); 350 + folioq_append(folioq, folio); 351 + BUG_ON(folioq_folio(folioq, 0) != folio); 352 + BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio)); 353 + rreq->buffer = folioq; 354 + rreq->buffer_tail = folioq; 355 + rreq->submitted = rreq->start + rreq->len; 356 + iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len); 357 + rreq->ractl = (struct readahead_control *)1UL; 358 + return 0; 359 + } 360 + 361 + /* 362 + * Read into gaps in a folio partially filled by a streaming write. 363 + */ 364 + static int netfs_read_gaps(struct file *file, struct folio *folio) 365 + { 366 + struct netfs_io_request *rreq; 367 + struct address_space *mapping = folio->mapping; 368 + struct netfs_folio *finfo = netfs_folio_info(folio); 369 + struct netfs_inode *ctx = netfs_inode(mapping->host); 370 + struct folio *sink = NULL; 371 + struct bio_vec *bvec; 372 + unsigned int from = finfo->dirty_offset; 373 + unsigned int to = from + finfo->dirty_len; 374 + unsigned int off = 0, i = 0; 375 + size_t flen = folio_size(folio); 376 + size_t nr_bvec = flen / PAGE_SIZE + 2; 377 + size_t part; 378 + int ret; 379 + 380 + _enter("%lx", folio->index); 381 + 382 + rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS); 383 + if (IS_ERR(rreq)) { 384 + ret = PTR_ERR(rreq); 385 + goto alloc_error; 386 + } 387 + 388 + ret = netfs_begin_cache_read(rreq, ctx); 389 + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 390 + goto discard; 391 + 392 + netfs_stat(&netfs_n_rh_read_folio); 393 + trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps); 394 + 395 + /* Fiddle the buffer so that a gap at the beginning and/or a gap at the 396 + * end get copied to, but the middle is discarded. 397 + */ 398 + ret = -ENOMEM; 399 + bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 400 + if (!bvec) 401 + goto discard; 402 + 403 + sink = folio_alloc(GFP_KERNEL, 0); 404 + if (!sink) { 405 + kfree(bvec); 406 + goto discard; 407 + } 408 + 409 + trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 410 + 411 + rreq->direct_bv = bvec; 412 + rreq->direct_bv_count = nr_bvec; 413 + if (from > 0) { 414 + bvec_set_folio(&bvec[i++], folio, from, 0); 415 + off = from; 416 + } 417 + while (off < to) { 418 + part = min_t(size_t, to - off, PAGE_SIZE); 419 + bvec_set_folio(&bvec[i++], sink, part, 0); 420 + off += part; 421 + } 422 + if (to < flen) 423 + bvec_set_folio(&bvec[i++], folio, flen - to, to); 424 + iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 425 + rreq->submitted = rreq->start + flen; 426 + 427 + netfs_read_to_pagecache(rreq); 428 + 429 + if (sink) 430 + folio_put(sink); 431 + 432 + ret = netfs_wait_for_read(rreq); 433 + if (ret == 0) { 434 + flush_dcache_folio(folio); 435 + folio_mark_uptodate(folio); 436 + } 437 + folio_unlock(folio); 438 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 439 + return ret < 0 ? ret : 0; 440 + 441 + discard: 442 + netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 443 + alloc_error: 444 + folio_unlock(folio); 445 + return ret; 446 + } 450 447 451 448 /** 452 449 * netfs_read_folio - Helper to manage a read_folio request ··· 578 353 struct address_space *mapping = folio->mapping; 579 354 struct netfs_io_request *rreq; 580 355 struct netfs_inode *ctx = netfs_inode(mapping->host); 581 - struct folio *sink = NULL; 582 356 int ret; 357 + 358 + if (folio_test_dirty(folio)) { 359 + trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 360 + return netfs_read_gaps(file, folio); 361 + } 583 362 584 363 _enter("%lx", folio->index); 585 364 ··· 603 374 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 604 375 605 376 /* Set up the output buffer */ 606 - if (folio_test_dirty(folio)) { 607 - /* Handle someone trying to read from an unflushed streaming 608 - * write. We fiddle the buffer so that a gap at the beginning 609 - * and/or a gap at the end get copied to, but the middle is 610 - * discarded. 611 - */ 612 - struct netfs_folio *finfo = netfs_folio_info(folio); 613 - struct bio_vec *bvec; 614 - unsigned int from = finfo->dirty_offset; 615 - unsigned int to = from + finfo->dirty_len; 616 - unsigned int off = 0, i = 0; 617 - size_t flen = folio_size(folio); 618 - size_t nr_bvec = flen / PAGE_SIZE + 2; 619 - size_t part; 377 + ret = netfs_create_singular_buffer(rreq, folio); 378 + if (ret < 0) 379 + goto discard; 620 380 621 - ret = -ENOMEM; 622 - bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 623 - if (!bvec) 624 - goto discard; 625 - 626 - sink = folio_alloc(GFP_KERNEL, 0); 627 - if (!sink) 628 - goto discard; 629 - 630 - trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 631 - 632 - rreq->direct_bv = bvec; 633 - rreq->direct_bv_count = nr_bvec; 634 - if (from > 0) { 635 - bvec_set_folio(&bvec[i++], folio, from, 0); 636 - off = from; 637 - } 638 - while (off < to) { 639 - part = min_t(size_t, to - off, PAGE_SIZE); 640 - bvec_set_folio(&bvec[i++], sink, part, 0); 641 - off += part; 642 - } 643 - if (to < flen) 644 - bvec_set_folio(&bvec[i++], folio, flen - to, to); 645 - iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 646 - } else { 647 - iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 648 - rreq->start, rreq->len); 649 - } 650 - 651 - ret = netfs_begin_read(rreq, true); 652 - if (sink) 653 - folio_put(sink); 381 + netfs_read_to_pagecache(rreq); 382 + ret = netfs_wait_for_read(rreq); 654 383 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 655 384 return ret < 0 ? ret : 0; 656 385 ··· 681 494 * 682 495 * Pre-read data for a write-begin request by drawing data from the cache if 683 496 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 684 - * Multiple I/O requests from different sources will get munged together. If 685 - * necessary, the readahead window can be expanded in either direction to a 686 - * more convenient alighment for RPC efficiency or to make storage in the cache 687 - * feasible. 497 + * Multiple I/O requests from different sources will get munged together. 688 498 * 689 499 * The calling netfs must provide a table of operations, only one of which, 690 - * issue_op, is mandatory. 500 + * issue_read, is mandatory. 691 501 * 692 502 * The check_write_begin() operation can be provided to check for and flush 693 503 * conflicting writes once the folio is grabbed and locked. It is passed a ··· 711 527 struct folio *folio; 712 528 pgoff_t index = pos >> PAGE_SHIFT; 713 529 int ret; 714 - 715 - DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 716 530 717 531 retry: 718 532 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, ··· 759 577 netfs_stat(&netfs_n_rh_write_begin); 760 578 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 761 579 762 - /* Expand the request to meet caching requirements and download 763 - * preferences. 764 - */ 765 - ractl._nr_pages = folio_nr_pages(folio); 766 - netfs_rreq_expand(rreq, &ractl); 767 - 768 580 /* Set up the output buffer */ 769 - iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 770 - rreq->start, rreq->len); 581 + ret = netfs_create_singular_buffer(rreq, folio); 582 + if (ret < 0) 583 + goto error_put; 771 584 772 - /* We hold the folio locks, so we can drop the references */ 773 - folio_get(folio); 774 - while (readahead_folio(&ractl)) 775 - ; 776 - 777 - ret = netfs_begin_read(rreq, true); 585 + netfs_read_to_pagecache(rreq); 586 + ret = netfs_wait_for_read(rreq); 778 587 if (ret < 0) 779 588 goto error; 780 589 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); ··· 825 652 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 826 653 827 654 /* Set up the output buffer */ 828 - iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 829 - rreq->start, rreq->len); 655 + ret = netfs_create_singular_buffer(rreq, folio); 656 + if (ret < 0) 657 + goto error_put; 830 658 831 - ret = netfs_begin_read(rreq, true); 659 + folioq_mark2(rreq->buffer, 0); 660 + netfs_read_to_pagecache(rreq); 661 + ret = netfs_wait_for_read(rreq); 832 662 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 833 663 return ret; 834 664
+141 -6
fs/netfs/direct_read.c
··· 16 16 #include <linux/netfs.h> 17 17 #include "internal.h" 18 18 19 + static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq) 20 + { 21 + struct netfs_io_request *rreq = subreq->rreq; 22 + size_t rsize; 23 + 24 + rsize = umin(subreq->len, rreq->io_streams[0].sreq_max_len); 25 + subreq->len = rsize; 26 + 27 + if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 28 + size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize, 29 + rreq->io_streams[0].sreq_max_segs); 30 + 31 + if (limit < rsize) { 32 + subreq->len = limit; 33 + trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 34 + } 35 + } 36 + 37 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 38 + 39 + subreq->io_iter = rreq->iter; 40 + iov_iter_truncate(&subreq->io_iter, subreq->len); 41 + iov_iter_advance(&rreq->iter, subreq->len); 42 + } 43 + 44 + /* 45 + * Perform a read to a buffer from the server, slicing up the region to be read 46 + * according to the network rsize. 47 + */ 48 + static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) 49 + { 50 + unsigned long long start = rreq->start; 51 + ssize_t size = rreq->len; 52 + int ret = 0; 53 + 54 + atomic_set(&rreq->nr_outstanding, 1); 55 + 56 + do { 57 + struct netfs_io_subrequest *subreq; 58 + ssize_t slice; 59 + 60 + subreq = netfs_alloc_subrequest(rreq); 61 + if (!subreq) { 62 + ret = -ENOMEM; 63 + break; 64 + } 65 + 66 + subreq->source = NETFS_DOWNLOAD_FROM_SERVER; 67 + subreq->start = start; 68 + subreq->len = size; 69 + 70 + atomic_inc(&rreq->nr_outstanding); 71 + spin_lock_bh(&rreq->lock); 72 + list_add_tail(&subreq->rreq_link, &rreq->subrequests); 73 + subreq->prev_donated = rreq->prev_donated; 74 + rreq->prev_donated = 0; 75 + trace_netfs_sreq(subreq, netfs_sreq_trace_added); 76 + spin_unlock_bh(&rreq->lock); 77 + 78 + netfs_stat(&netfs_n_rh_download); 79 + if (rreq->netfs_ops->prepare_read) { 80 + ret = rreq->netfs_ops->prepare_read(subreq); 81 + if (ret < 0) { 82 + atomic_dec(&rreq->nr_outstanding); 83 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 84 + break; 85 + } 86 + } 87 + 88 + netfs_prepare_dio_read_iterator(subreq); 89 + slice = subreq->len; 90 + rreq->netfs_ops->issue_read(subreq); 91 + 92 + size -= slice; 93 + start += slice; 94 + rreq->submitted += slice; 95 + 96 + if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && 97 + test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) 98 + break; 99 + cond_resched(); 100 + } while (size > 0); 101 + 102 + if (atomic_dec_and_test(&rreq->nr_outstanding)) 103 + netfs_rreq_terminated(rreq, false); 104 + return ret; 105 + } 106 + 107 + /* 108 + * Perform a read to an application buffer, bypassing the pagecache and the 109 + * local disk cache. 110 + */ 111 + static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) 112 + { 113 + int ret; 114 + 115 + _enter("R=%x %llx-%llx", 116 + rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); 117 + 118 + if (rreq->len == 0) { 119 + pr_err("Zero-sized read [R=%x]\n", rreq->debug_id); 120 + return -EIO; 121 + } 122 + 123 + // TODO: Use bounce buffer if requested 124 + 125 + inode_dio_begin(rreq->inode); 126 + 127 + ret = netfs_dispatch_unbuffered_reads(rreq); 128 + 129 + if (!rreq->submitted) { 130 + netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); 131 + inode_dio_end(rreq->inode); 132 + ret = 0; 133 + goto out; 134 + } 135 + 136 + if (sync) { 137 + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 138 + wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, 139 + TASK_UNINTERRUPTIBLE); 140 + 141 + ret = rreq->error; 142 + if (ret == 0 && rreq->submitted < rreq->len && 143 + rreq->origin != NETFS_DIO_READ) { 144 + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 145 + ret = -EIO; 146 + } 147 + } else { 148 + ret = -EIOCBQUEUED; 149 + } 150 + 151 + out: 152 + _leave(" = %d", ret); 153 + return ret; 154 + } 155 + 19 156 /** 20 157 * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read 21 158 * @iocb: The I/O control descriptor describing the read ··· 168 31 struct netfs_io_request *rreq; 169 32 ssize_t ret; 170 33 size_t orig_count = iov_iter_count(iter); 171 - bool async = !is_sync_kiocb(iocb); 34 + bool sync = is_sync_kiocb(iocb); 172 35 173 36 _enter(""); 174 37 ··· 215 78 216 79 // TODO: Set up bounce buffer if needed 217 80 218 - if (async) 81 + if (!sync) 219 82 rreq->iocb = iocb; 220 83 221 - ret = netfs_begin_read(rreq, is_sync_kiocb(iocb)); 84 + ret = netfs_unbuffered_read(rreq, sync); 222 85 if (ret < 0) 223 86 goto out; /* May be -EIOCBQUEUED */ 224 - if (!async) { 87 + if (sync) { 225 88 // TODO: Copy from bounce buffer 226 89 iocb->ki_pos += rreq->transferred; 227 90 ret = rreq->transferred; ··· 231 94 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 232 95 if (ret > 0) 233 96 orig_count -= ret; 234 - if (ret != -EIOCBQUEUED) 235 - iov_iter_revert(iter, orig_count - iov_iter_count(iter)); 236 97 return ret; 237 98 } 238 99 EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
+28 -7
fs/netfs/internal.h
··· 23 23 /* 24 24 * buffered_read.c 25 25 */ 26 - void netfs_rreq_unlock_folios(struct netfs_io_request *rreq); 27 26 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 28 27 size_t offset, size_t len); 29 - 30 - /* 31 - * io.c 32 - */ 33 - void netfs_rreq_work(struct work_struct *work); 34 - int netfs_begin_read(struct netfs_io_request *rreq, bool sync); 35 28 36 29 /* 37 30 * main.c ··· 84 91 } 85 92 86 93 /* 94 + * read_collect.c 95 + */ 96 + void netfs_read_termination_worker(struct work_struct *work); 97 + void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async); 98 + 99 + /* 100 + * read_pgpriv2.c 101 + */ 102 + void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq, 103 + struct netfs_io_request *rreq, 104 + struct folio_queue *folioq, 105 + int slot); 106 + void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq); 107 + bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq); 108 + 109 + /* 110 + * read_retry.c 111 + */ 112 + void netfs_retry_reads(struct netfs_io_request *rreq); 113 + void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq); 114 + 115 + /* 87 116 * stats.c 88 117 */ 89 118 #ifdef CONFIG_NETFS_STATS ··· 132 117 extern atomic_t netfs_n_wh_writethrough; 133 118 extern atomic_t netfs_n_wh_dio_write; 134 119 extern atomic_t netfs_n_wh_writepages; 120 + extern atomic_t netfs_n_wh_copy_to_cache; 135 121 extern atomic_t netfs_n_wh_wstream_conflict; 136 122 extern atomic_t netfs_n_wh_upload; 137 123 extern atomic_t netfs_n_wh_upload_done; ··· 178 162 void netfs_reissue_write(struct netfs_io_stream *stream, 179 163 struct netfs_io_subrequest *subreq, 180 164 struct iov_iter *source); 165 + void netfs_issue_write(struct netfs_io_request *wreq, 166 + struct netfs_io_stream *stream); 167 + int netfs_advance_write(struct netfs_io_request *wreq, 168 + struct netfs_io_stream *stream, 169 + loff_t start, size_t len, bool to_eof); 181 170 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 182 171 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 183 172 struct folio *folio, size_t copied, bool to_page_end,
+50
fs/netfs/iterator.c
··· 188 188 return min(span, max_size); 189 189 } 190 190 191 + /* 192 + * Select the span of a folio queue iterator we're going to use. Limit it by 193 + * both maximum size and maximum number of segments. Returns the size of the 194 + * span in bytes. 195 + */ 196 + static size_t netfs_limit_folioq(const struct iov_iter *iter, size_t start_offset, 197 + size_t max_size, size_t max_segs) 198 + { 199 + const struct folio_queue *folioq = iter->folioq; 200 + unsigned int nsegs = 0; 201 + unsigned int slot = iter->folioq_slot; 202 + size_t span = 0, n = iter->count; 203 + 204 + if (WARN_ON(!iov_iter_is_folioq(iter)) || 205 + WARN_ON(start_offset > n) || 206 + n == 0) 207 + return 0; 208 + max_size = umin(max_size, n - start_offset); 209 + 210 + if (slot >= folioq_nr_slots(folioq)) { 211 + folioq = folioq->next; 212 + slot = 0; 213 + } 214 + 215 + start_offset += iter->iov_offset; 216 + do { 217 + size_t flen = folioq_folio_size(folioq, slot); 218 + 219 + if (start_offset < flen) { 220 + span += flen - start_offset; 221 + nsegs++; 222 + start_offset = 0; 223 + } else { 224 + start_offset -= flen; 225 + } 226 + if (span >= max_size || nsegs >= max_segs) 227 + break; 228 + 229 + slot++; 230 + if (slot >= folioq_nr_slots(folioq)) { 231 + folioq = folioq->next; 232 + slot = 0; 233 + } 234 + } while (folioq); 235 + 236 + return umin(span, max_size); 237 + } 238 + 191 239 size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 192 240 size_t max_size, size_t max_segs) 193 241 { 242 + if (iov_iter_is_folioq(iter)) 243 + return netfs_limit_folioq(iter, start_offset, max_size, max_segs); 194 244 if (iov_iter_is_bvec(iter)) 195 245 return netfs_limit_bvec(iter, start_offset, max_size, max_segs); 196 246 if (iov_iter_is_xarray(iter))
+3 -1
fs/netfs/main.c
··· 36 36 static const char *netfs_origins[nr__netfs_io_origin] = { 37 37 [NETFS_READAHEAD] = "RA", 38 38 [NETFS_READPAGE] = "RP", 39 + [NETFS_READ_GAPS] = "RG", 39 40 [NETFS_READ_FOR_WRITE] = "RW", 40 41 [NETFS_DIO_READ] = "DR", 41 42 [NETFS_WRITEBACK] = "WB", 42 43 [NETFS_WRITETHROUGH] = "WT", 43 44 [NETFS_UNBUFFERED_WRITE] = "UW", 44 45 [NETFS_DIO_WRITE] = "DW", 46 + [NETFS_PGPRIV2_COPY_TO_CACHE] = "2C", 45 47 }; 46 48 47 49 /* ··· 63 61 64 62 rreq = list_entry(v, struct netfs_io_request, proc_link); 65 63 seq_printf(m, 66 - "%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx", 64 + "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", 67 65 rreq->debug_id, 68 66 netfs_origins[rreq->origin], 69 67 refcount_read(&rreq->ref),
+5 -3
fs/netfs/objects.c
··· 36 36 memset(rreq, 0, kmem_cache_size(cache)); 37 37 rreq->start = start; 38 38 rreq->len = len; 39 - rreq->upper_len = len; 40 39 rreq->origin = origin; 41 40 rreq->netfs_ops = ctx->ops; 42 41 rreq->mapping = mapping; ··· 43 44 rreq->i_size = i_size_read(inode); 44 45 rreq->debug_id = atomic_inc_return(&debug_ids); 45 46 rreq->wsize = INT_MAX; 47 + rreq->io_streams[0].sreq_max_len = ULONG_MAX; 48 + rreq->io_streams[0].sreq_max_segs = 0; 46 49 spin_lock_init(&rreq->lock); 47 50 INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); 48 51 INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); ··· 53 52 54 53 if (origin == NETFS_READAHEAD || 55 54 origin == NETFS_READPAGE || 55 + origin == NETFS_READ_GAPS || 56 56 origin == NETFS_READ_FOR_WRITE || 57 57 origin == NETFS_DIO_READ) 58 - INIT_WORK(&rreq->work, netfs_rreq_work); 58 + INIT_WORK(&rreq->work, netfs_read_termination_worker); 59 59 else 60 60 INIT_WORK(&rreq->work, netfs_write_collection_worker); 61 61 ··· 165 163 if (was_async) { 166 164 rreq->work.func = netfs_free_request; 167 165 if (!queue_work(system_unbound_wq, &rreq->work)) 168 - BUG(); 166 + WARN_ON(1); 169 167 } else { 170 168 netfs_free_request(&rreq->work); 171 169 }
+544
fs/netfs/read_collect.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem read subrequest result collection, assessment and 3 + * retrying. 4 + * 5 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 + * Written by David Howells (dhowells@redhat.com) 7 + */ 8 + 9 + #include <linux/export.h> 10 + #include <linux/fs.h> 11 + #include <linux/mm.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/slab.h> 14 + #include <linux/task_io_accounting_ops.h> 15 + #include "internal.h" 16 + 17 + /* 18 + * Clear the unread part of an I/O request. 19 + */ 20 + static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 21 + { 22 + netfs_reset_iter(subreq); 23 + WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter)); 24 + iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 25 + if (subreq->start + subreq->transferred >= subreq->rreq->i_size) 26 + __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 27 + } 28 + 29 + /* 30 + * Flush, mark and unlock a folio that's now completely read. If we want to 31 + * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it 32 + * dirty and let writeback handle it. 33 + */ 34 + static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq, 35 + struct netfs_io_request *rreq, 36 + struct folio_queue *folioq, 37 + int slot) 38 + { 39 + struct netfs_folio *finfo; 40 + struct folio *folio = folioq_folio(folioq, slot); 41 + 42 + flush_dcache_folio(folio); 43 + folio_mark_uptodate(folio); 44 + 45 + if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 46 + finfo = netfs_folio_info(folio); 47 + if (finfo) { 48 + trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 49 + if (finfo->netfs_group) 50 + folio_change_private(folio, finfo->netfs_group); 51 + else 52 + folio_detach_private(folio); 53 + kfree(finfo); 54 + } 55 + 56 + if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 57 + if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 58 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 59 + folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 60 + folio_mark_dirty(folio); 61 + } 62 + } else { 63 + trace_netfs_folio(folio, netfs_folio_trace_read_done); 64 + } 65 + } else { 66 + // TODO: Use of PG_private_2 is deprecated. 67 + if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) 68 + netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot); 69 + } 70 + 71 + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 72 + if (folio->index == rreq->no_unlock_folio && 73 + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { 74 + _debug("no unlock"); 75 + } else { 76 + trace_netfs_folio(folio, netfs_folio_trace_read_unlock); 77 + folio_unlock(folio); 78 + } 79 + } 80 + } 81 + 82 + /* 83 + * Unlock any folios that are now completely read. Returns true if the 84 + * subrequest is removed from the list. 85 + */ 86 + static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async) 87 + { 88 + struct netfs_io_subrequest *prev, *next; 89 + struct netfs_io_request *rreq = subreq->rreq; 90 + struct folio_queue *folioq = subreq->curr_folioq; 91 + size_t avail, prev_donated, next_donated, fsize, part, excess; 92 + loff_t fpos, start; 93 + loff_t fend; 94 + int slot = subreq->curr_folioq_slot; 95 + 96 + if (WARN(subreq->transferred > subreq->len, 97 + "Subreq overread: R%x[%x] %zu > %zu", 98 + rreq->debug_id, subreq->debug_index, 99 + subreq->transferred, subreq->len)) 100 + subreq->transferred = subreq->len; 101 + 102 + next_folio: 103 + fsize = PAGE_SIZE << subreq->curr_folio_order; 104 + fpos = round_down(subreq->start + subreq->consumed, fsize); 105 + fend = fpos + fsize; 106 + 107 + if (WARN_ON_ONCE(!folioq) || 108 + WARN_ON_ONCE(!folioq_folio(folioq, slot)) || 109 + WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) { 110 + pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n", 111 + rreq->debug_id, subreq->debug_index, 112 + subreq->start, subreq->start + subreq->transferred - 1, 113 + subreq->consumed, subreq->transferred, subreq->len, 114 + slot); 115 + if (folioq) { 116 + struct folio *folio = folioq_folio(folioq, slot); 117 + 118 + pr_err("folioq: orders=%02x%02x%02x%02x\n", 119 + folioq->orders[0], folioq->orders[1], 120 + folioq->orders[2], folioq->orders[3]); 121 + if (folio) 122 + pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n", 123 + fpos, fend - 1, folio_pos(folio), folio_order(folio), 124 + folioq_folio_order(folioq, slot)); 125 + } 126 + } 127 + 128 + donation_changed: 129 + /* Try to consume the current folio if we've hit or passed the end of 130 + * it. There's a possibility that this subreq doesn't start at the 131 + * beginning of the folio, in which case we need to donate to/from the 132 + * preceding subreq. 133 + * 134 + * We also need to include any potential donation back from the 135 + * following subreq. 136 + */ 137 + prev_donated = READ_ONCE(subreq->prev_donated); 138 + next_donated = READ_ONCE(subreq->next_donated); 139 + if (prev_donated || next_donated) { 140 + spin_lock_bh(&rreq->lock); 141 + prev_donated = subreq->prev_donated; 142 + next_donated = subreq->next_donated; 143 + subreq->start -= prev_donated; 144 + subreq->len += prev_donated; 145 + subreq->transferred += prev_donated; 146 + prev_donated = subreq->prev_donated = 0; 147 + if (subreq->transferred == subreq->len) { 148 + subreq->len += next_donated; 149 + subreq->transferred += next_donated; 150 + next_donated = subreq->next_donated = 0; 151 + } 152 + trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations); 153 + spin_unlock_bh(&rreq->lock); 154 + } 155 + 156 + avail = subreq->transferred; 157 + if (avail == subreq->len) 158 + avail += next_donated; 159 + start = subreq->start; 160 + if (subreq->consumed == 0) { 161 + start -= prev_donated; 162 + avail += prev_donated; 163 + } else { 164 + start += subreq->consumed; 165 + avail -= subreq->consumed; 166 + } 167 + part = umin(avail, fsize); 168 + 169 + trace_netfs_progress(subreq, start, avail, part); 170 + 171 + if (start + avail >= fend) { 172 + if (fpos == start) { 173 + /* Flush, unlock and mark for caching any folio we've just read. */ 174 + subreq->consumed = fend - subreq->start; 175 + netfs_unlock_read_folio(subreq, rreq, folioq, slot); 176 + folioq_mark2(folioq, slot); 177 + if (subreq->consumed >= subreq->len) 178 + goto remove_subreq; 179 + } else if (fpos < start) { 180 + excess = fend - subreq->start; 181 + 182 + spin_lock_bh(&rreq->lock); 183 + /* If we complete first on a folio split with the 184 + * preceding subreq, donate to that subreq - otherwise 185 + * we get the responsibility. 186 + */ 187 + if (subreq->prev_donated != prev_donated) { 188 + spin_unlock_bh(&rreq->lock); 189 + goto donation_changed; 190 + } 191 + 192 + if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) { 193 + spin_unlock_bh(&rreq->lock); 194 + pr_err("Can't donate prior to front\n"); 195 + goto bad; 196 + } 197 + 198 + prev = list_prev_entry(subreq, rreq_link); 199 + WRITE_ONCE(prev->next_donated, prev->next_donated + excess); 200 + subreq->start += excess; 201 + subreq->len -= excess; 202 + subreq->transferred -= excess; 203 + trace_netfs_donate(rreq, subreq, prev, excess, 204 + netfs_trace_donate_tail_to_prev); 205 + trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev); 206 + 207 + if (subreq->consumed >= subreq->len) 208 + goto remove_subreq_locked; 209 + spin_unlock_bh(&rreq->lock); 210 + } else { 211 + pr_err("fpos > start\n"); 212 + goto bad; 213 + } 214 + 215 + /* Advance the rolling buffer to the next folio. */ 216 + slot++; 217 + if (slot >= folioq_nr_slots(folioq)) { 218 + slot = 0; 219 + folioq = folioq->next; 220 + subreq->curr_folioq = folioq; 221 + } 222 + subreq->curr_folioq_slot = slot; 223 + if (folioq && folioq_folio(folioq, slot)) 224 + subreq->curr_folio_order = folioq->orders[slot]; 225 + if (!was_async) 226 + cond_resched(); 227 + goto next_folio; 228 + } 229 + 230 + /* Deal with partial progress. */ 231 + if (subreq->transferred < subreq->len) 232 + return false; 233 + 234 + /* Donate the remaining downloaded data to one of the neighbouring 235 + * subrequests. Note that we may race with them doing the same thing. 236 + */ 237 + spin_lock_bh(&rreq->lock); 238 + 239 + if (subreq->prev_donated != prev_donated || 240 + subreq->next_donated != next_donated) { 241 + spin_unlock_bh(&rreq->lock); 242 + cond_resched(); 243 + goto donation_changed; 244 + } 245 + 246 + /* Deal with the trickiest case: that this subreq is in the middle of a 247 + * folio, not touching either edge, but finishes first. In such a 248 + * case, we donate to the previous subreq, if there is one, so that the 249 + * donation is only handled when that completes - and remove this 250 + * subreq from the list. 251 + * 252 + * If the previous subreq finished first, we will have acquired their 253 + * donation and should be able to unlock folios and/or donate nextwards. 254 + */ 255 + if (!subreq->consumed && 256 + !prev_donated && 257 + !list_is_first(&subreq->rreq_link, &rreq->subrequests)) { 258 + prev = list_prev_entry(subreq, rreq_link); 259 + WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len); 260 + subreq->start += subreq->len; 261 + subreq->len = 0; 262 + subreq->transferred = 0; 263 + trace_netfs_donate(rreq, subreq, prev, subreq->len, 264 + netfs_trace_donate_to_prev); 265 + trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev); 266 + goto remove_subreq_locked; 267 + } 268 + 269 + /* If we can't donate down the chain, donate up the chain instead. */ 270 + excess = subreq->len - subreq->consumed + next_donated; 271 + 272 + if (!subreq->consumed) 273 + excess += prev_donated; 274 + 275 + if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 276 + rreq->prev_donated = excess; 277 + trace_netfs_donate(rreq, subreq, NULL, excess, 278 + netfs_trace_donate_to_deferred_next); 279 + } else { 280 + next = list_next_entry(subreq, rreq_link); 281 + WRITE_ONCE(next->prev_donated, excess); 282 + trace_netfs_donate(rreq, subreq, next, excess, 283 + netfs_trace_donate_to_next); 284 + } 285 + trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next); 286 + subreq->len = subreq->consumed; 287 + subreq->transferred = subreq->consumed; 288 + goto remove_subreq_locked; 289 + 290 + remove_subreq: 291 + spin_lock_bh(&rreq->lock); 292 + remove_subreq_locked: 293 + subreq->consumed = subreq->len; 294 + list_del(&subreq->rreq_link); 295 + spin_unlock_bh(&rreq->lock); 296 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed); 297 + return true; 298 + 299 + bad: 300 + /* Errr... prev and next both donated to us, but insufficient to finish 301 + * the folio. 302 + */ 303 + printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n", 304 + rreq->debug_id, subreq->debug_index, 305 + subreq->start, subreq->start + subreq->transferred - 1, 306 + subreq->consumed, subreq->transferred, subreq->len); 307 + printk("folio: %llx-%llx\n", fpos, fend - 1); 308 + printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated); 309 + printk("s=%llx av=%zx part=%zx\n", start, avail, part); 310 + BUG(); 311 + } 312 + 313 + /* 314 + * Do page flushing and suchlike after DIO. 315 + */ 316 + static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 317 + { 318 + struct netfs_io_subrequest *subreq; 319 + unsigned int i; 320 + 321 + /* Collect unbuffered reads and direct reads, adding up the transfer 322 + * sizes until we find the first short or failed subrequest. 323 + */ 324 + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 325 + rreq->transferred += subreq->transferred; 326 + 327 + if (subreq->transferred < subreq->len || 328 + test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { 329 + rreq->error = subreq->error; 330 + break; 331 + } 332 + } 333 + 334 + if (rreq->origin == NETFS_DIO_READ) { 335 + for (i = 0; i < rreq->direct_bv_count; i++) { 336 + flush_dcache_page(rreq->direct_bv[i].bv_page); 337 + // TODO: cifs marks pages in the destination buffer 338 + // dirty under some circumstances after a read. Do we 339 + // need to do that too? 340 + set_page_dirty(rreq->direct_bv[i].bv_page); 341 + } 342 + } 343 + 344 + if (rreq->iocb) { 345 + rreq->iocb->ki_pos += rreq->transferred; 346 + if (rreq->iocb->ki_complete) 347 + rreq->iocb->ki_complete( 348 + rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 349 + } 350 + if (rreq->netfs_ops->done) 351 + rreq->netfs_ops->done(rreq); 352 + if (rreq->origin == NETFS_DIO_READ) 353 + inode_dio_end(rreq->inode); 354 + } 355 + 356 + /* 357 + * Assess the state of a read request and decide what to do next. 358 + * 359 + * Note that we're in normal kernel thread context at this point, possibly 360 + * running on a workqueue. 361 + */ 362 + static void netfs_rreq_assess(struct netfs_io_request *rreq) 363 + { 364 + trace_netfs_rreq(rreq, netfs_rreq_trace_assess); 365 + 366 + //netfs_rreq_is_still_valid(rreq); 367 + 368 + if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) { 369 + netfs_retry_reads(rreq); 370 + return; 371 + } 372 + 373 + if (rreq->origin == NETFS_DIO_READ || 374 + rreq->origin == NETFS_READ_GAPS) 375 + netfs_rreq_assess_dio(rreq); 376 + task_io_account_read(rreq->transferred); 377 + 378 + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); 379 + clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 380 + wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); 381 + 382 + trace_netfs_rreq(rreq, netfs_rreq_trace_done); 383 + netfs_clear_subrequests(rreq, false); 384 + netfs_unlock_abandoned_read_pages(rreq); 385 + if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))) 386 + netfs_pgpriv2_write_to_the_cache(rreq); 387 + } 388 + 389 + void netfs_read_termination_worker(struct work_struct *work) 390 + { 391 + struct netfs_io_request *rreq = 392 + container_of(work, struct netfs_io_request, work); 393 + netfs_see_request(rreq, netfs_rreq_trace_see_work); 394 + netfs_rreq_assess(rreq); 395 + netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete); 396 + } 397 + 398 + /* 399 + * Handle the completion of all outstanding I/O operations on a read request. 400 + * We inherit a ref from the caller. 401 + */ 402 + void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async) 403 + { 404 + if (!was_async) 405 + return netfs_rreq_assess(rreq); 406 + if (!work_pending(&rreq->work)) { 407 + netfs_get_request(rreq, netfs_rreq_trace_get_work); 408 + if (!queue_work(system_unbound_wq, &rreq->work)) 409 + netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq); 410 + } 411 + } 412 + 413 + /** 414 + * netfs_read_subreq_progress - Note progress of a read operation. 415 + * @subreq: The read request that has terminated. 416 + * @was_async: True if we're in an asynchronous context. 417 + * 418 + * This tells the read side of netfs lib that a contributory I/O operation has 419 + * made some progress and that it may be possible to unlock some folios. 420 + * 421 + * Before calling, the filesystem should update subreq->transferred to track 422 + * the amount of data copied into the output buffer. 423 + * 424 + * If @was_async is true, the caller might be running in softirq or interrupt 425 + * context and we can't sleep. 426 + */ 427 + void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, 428 + bool was_async) 429 + { 430 + struct netfs_io_request *rreq = subreq->rreq; 431 + 432 + trace_netfs_sreq(subreq, netfs_sreq_trace_progress); 433 + 434 + if (subreq->transferred > subreq->consumed && 435 + (rreq->origin == NETFS_READAHEAD || 436 + rreq->origin == NETFS_READPAGE || 437 + rreq->origin == NETFS_READ_FOR_WRITE)) { 438 + netfs_consume_read_data(subreq, was_async); 439 + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 440 + } 441 + } 442 + EXPORT_SYMBOL(netfs_read_subreq_progress); 443 + 444 + /** 445 + * netfs_read_subreq_terminated - Note the termination of an I/O operation. 446 + * @subreq: The I/O request that has terminated. 447 + * @error: Error code indicating type of completion. 448 + * @was_async: The termination was asynchronous 449 + * 450 + * This tells the read helper that a contributory I/O operation has terminated, 451 + * one way or another, and that it should integrate the results. 452 + * 453 + * The caller indicates the outcome of the operation through @error, supplying 454 + * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY 455 + * is set) or a negative error code. The helper will look after reissuing I/O 456 + * operations as appropriate and writing downloaded data to the cache. 457 + * 458 + * Before calling, the filesystem should update subreq->transferred to track 459 + * the amount of data copied into the output buffer. 460 + * 461 + * If @was_async is true, the caller might be running in softirq or interrupt 462 + * context and we can't sleep. 463 + */ 464 + void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, 465 + int error, bool was_async) 466 + { 467 + struct netfs_io_request *rreq = subreq->rreq; 468 + 469 + switch (subreq->source) { 470 + case NETFS_READ_FROM_CACHE: 471 + netfs_stat(&netfs_n_rh_read_done); 472 + break; 473 + case NETFS_DOWNLOAD_FROM_SERVER: 474 + netfs_stat(&netfs_n_rh_download_done); 475 + break; 476 + default: 477 + break; 478 + } 479 + 480 + if (rreq->origin != NETFS_DIO_READ) { 481 + /* Collect buffered reads. 482 + * 483 + * If the read completed validly short, then we can clear the 484 + * tail before going on to unlock the folios. 485 + */ 486 + if (error == 0 && subreq->transferred < subreq->len && 487 + (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) || 488 + test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) { 489 + netfs_clear_unread(subreq); 490 + subreq->transferred = subreq->len; 491 + trace_netfs_sreq(subreq, netfs_sreq_trace_clear); 492 + } 493 + if (subreq->transferred > subreq->consumed && 494 + (rreq->origin == NETFS_READAHEAD || 495 + rreq->origin == NETFS_READPAGE || 496 + rreq->origin == NETFS_READ_FOR_WRITE)) { 497 + netfs_consume_read_data(subreq, was_async); 498 + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 499 + } 500 + rreq->transferred += subreq->transferred; 501 + } 502 + 503 + /* Deal with retry requests, short reads and errors. If we retry 504 + * but don't make progress, we abandon the attempt. 505 + */ 506 + if (!error && subreq->transferred < subreq->len) { 507 + if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { 508 + trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); 509 + } else { 510 + trace_netfs_sreq(subreq, netfs_sreq_trace_short); 511 + if (subreq->transferred > subreq->consumed) { 512 + __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 513 + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 514 + set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags); 515 + } else if (!__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 516 + __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 517 + set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags); 518 + } else { 519 + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 520 + error = -ENODATA; 521 + } 522 + } 523 + } 524 + 525 + subreq->error = error; 526 + trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 527 + 528 + if (unlikely(error < 0)) { 529 + trace_netfs_failure(rreq, subreq, error, netfs_fail_read); 530 + if (subreq->source == NETFS_READ_FROM_CACHE) { 531 + netfs_stat(&netfs_n_rh_read_failed); 532 + } else { 533 + netfs_stat(&netfs_n_rh_download_failed); 534 + set_bit(NETFS_RREQ_FAILED, &rreq->flags); 535 + rreq->error = subreq->error; 536 + } 537 + } 538 + 539 + if (atomic_dec_and_test(&rreq->nr_outstanding)) 540 + netfs_rreq_terminated(rreq, was_async); 541 + 542 + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 543 + } 544 + EXPORT_SYMBOL(netfs_read_subreq_terminated);
+264
fs/netfs/read_pgpriv2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Read with PG_private_2 [DEPRECATED]. 3 + * 4 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/export.h> 9 + #include <linux/fs.h> 10 + #include <linux/mm.h> 11 + #include <linux/pagemap.h> 12 + #include <linux/slab.h> 13 + #include <linux/task_io_accounting_ops.h> 14 + #include "internal.h" 15 + 16 + /* 17 + * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The 18 + * third mark in the folio queue is used to indicate that this folio needs 19 + * writing. 20 + */ 21 + void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq, 22 + struct netfs_io_request *rreq, 23 + struct folio_queue *folioq, 24 + int slot) 25 + { 26 + struct folio *folio = folioq_folio(folioq, slot); 27 + 28 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 29 + folio_start_private_2(folio); 30 + folioq_mark3(folioq, slot); 31 + } 32 + 33 + /* 34 + * [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an 35 + * unrecoverable error. 36 + */ 37 + static void netfs_pgpriv2_cancel(struct folio_queue *folioq) 38 + { 39 + struct folio *folio; 40 + int slot; 41 + 42 + while (folioq) { 43 + if (!folioq->marks3) { 44 + folioq = folioq->next; 45 + continue; 46 + } 47 + 48 + slot = __ffs(folioq->marks3); 49 + folio = folioq_folio(folioq, slot); 50 + 51 + trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); 52 + folio_end_private_2(folio); 53 + folioq_unmark3(folioq, slot); 54 + } 55 + } 56 + 57 + /* 58 + * [DEPRECATED] Copy a folio to the cache with PG_private_2 set. 59 + */ 60 + static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio) 61 + { 62 + struct netfs_io_stream *cache = &wreq->io_streams[1]; 63 + size_t fsize = folio_size(folio), flen = fsize; 64 + loff_t fpos = folio_pos(folio), i_size; 65 + bool to_eof = false; 66 + 67 + _enter(""); 68 + 69 + /* netfs_perform_write() may shift i_size around the page or from out 70 + * of the page to beyond it, but cannot move i_size into or through the 71 + * page since we have it locked. 72 + */ 73 + i_size = i_size_read(wreq->inode); 74 + 75 + if (fpos >= i_size) { 76 + /* mmap beyond eof. */ 77 + _debug("beyond eof"); 78 + folio_end_private_2(folio); 79 + return 0; 80 + } 81 + 82 + if (fpos + fsize > wreq->i_size) 83 + wreq->i_size = i_size; 84 + 85 + if (flen > i_size - fpos) { 86 + flen = i_size - fpos; 87 + to_eof = true; 88 + } else if (flen == i_size - fpos) { 89 + to_eof = true; 90 + } 91 + 92 + _debug("folio %zx %zx", flen, fsize); 93 + 94 + trace_netfs_folio(folio, netfs_folio_trace_store_copy); 95 + 96 + /* Attach the folio to the rolling buffer. */ 97 + if (netfs_buffer_append_folio(wreq, folio, false) < 0) 98 + return -ENOMEM; 99 + 100 + cache->submit_max_len = fsize; 101 + cache->submit_off = 0; 102 + cache->submit_len = flen; 103 + 104 + /* Attach the folio to one or more subrequests. For a big folio, we 105 + * could end up with thousands of subrequests if the wsize is small - 106 + * but we might need to wait during the creation of subrequests for 107 + * network resources (eg. SMB credits). 108 + */ 109 + do { 110 + ssize_t part; 111 + 112 + wreq->io_iter.iov_offset = cache->submit_off; 113 + 114 + atomic64_set(&wreq->issued_to, fpos + cache->submit_off); 115 + part = netfs_advance_write(wreq, cache, fpos + cache->submit_off, 116 + cache->submit_len, to_eof); 117 + cache->submit_off += part; 118 + cache->submit_max_len -= part; 119 + if (part > cache->submit_len) 120 + cache->submit_len = 0; 121 + else 122 + cache->submit_len -= part; 123 + } while (cache->submit_len > 0); 124 + 125 + wreq->io_iter.iov_offset = 0; 126 + iov_iter_advance(&wreq->io_iter, fsize); 127 + atomic64_set(&wreq->issued_to, fpos + fsize); 128 + 129 + if (flen < fsize) 130 + netfs_issue_write(wreq, cache); 131 + 132 + _leave(" = 0"); 133 + return 0; 134 + } 135 + 136 + /* 137 + * [DEPRECATED] Go through the buffer and write any folios that are marked with 138 + * the third mark to the cache. 139 + */ 140 + void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq) 141 + { 142 + struct netfs_io_request *wreq; 143 + struct folio_queue *folioq; 144 + struct folio *folio; 145 + int error = 0; 146 + int slot = 0; 147 + 148 + _enter(""); 149 + 150 + if (!fscache_resources_valid(&rreq->cache_resources)) 151 + goto couldnt_start; 152 + 153 + /* Need the first folio to be able to set up the op. */ 154 + for (folioq = rreq->buffer; folioq; folioq = folioq->next) { 155 + if (folioq->marks3) { 156 + slot = __ffs(folioq->marks3); 157 + break; 158 + } 159 + } 160 + if (!folioq) 161 + return; 162 + folio = folioq_folio(folioq, slot); 163 + 164 + wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio), 165 + NETFS_PGPRIV2_COPY_TO_CACHE); 166 + if (IS_ERR(wreq)) { 167 + kleave(" [create %ld]", PTR_ERR(wreq)); 168 + goto couldnt_start; 169 + } 170 + 171 + trace_netfs_write(wreq, netfs_write_trace_copy_to_cache); 172 + netfs_stat(&netfs_n_wh_copy_to_cache); 173 + 174 + for (;;) { 175 + error = netfs_pgpriv2_copy_folio(wreq, folio); 176 + if (error < 0) 177 + break; 178 + 179 + folioq_unmark3(folioq, slot); 180 + if (!folioq->marks3) { 181 + folioq = folioq->next; 182 + if (!folioq) 183 + break; 184 + } 185 + 186 + slot = __ffs(folioq->marks3); 187 + folio = folioq_folio(folioq, slot); 188 + } 189 + 190 + netfs_issue_write(wreq, &wreq->io_streams[1]); 191 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 192 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 193 + 194 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 195 + _leave(" = %d", error); 196 + couldnt_start: 197 + netfs_pgpriv2_cancel(rreq->buffer); 198 + } 199 + 200 + /* 201 + * [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished 202 + * copying. 203 + */ 204 + bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq) 205 + { 206 + struct folio_queue *folioq = wreq->buffer; 207 + unsigned long long collected_to = wreq->collected_to; 208 + unsigned int slot = wreq->buffer_head_slot; 209 + bool made_progress = false; 210 + 211 + if (slot >= folioq_nr_slots(folioq)) { 212 + folioq = netfs_delete_buffer_head(wreq); 213 + slot = 0; 214 + } 215 + 216 + for (;;) { 217 + struct folio *folio; 218 + unsigned long long fpos, fend; 219 + size_t fsize, flen; 220 + 221 + folio = folioq_folio(folioq, slot); 222 + if (WARN_ONCE(!folio_test_private_2(folio), 223 + "R=%08x: folio %lx is not marked private_2\n", 224 + wreq->debug_id, folio->index)) 225 + trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); 226 + 227 + fpos = folio_pos(folio); 228 + fsize = folio_size(folio); 229 + flen = fsize; 230 + 231 + fend = min_t(unsigned long long, fpos + flen, wreq->i_size); 232 + 233 + trace_netfs_collect_folio(wreq, folio, fend, collected_to); 234 + 235 + /* Unlock any folio we've transferred all of. */ 236 + if (collected_to < fend) 237 + break; 238 + 239 + trace_netfs_folio(folio, netfs_folio_trace_end_copy); 240 + folio_end_private_2(folio); 241 + wreq->cleaned_to = fpos + fsize; 242 + made_progress = true; 243 + 244 + /* Clean up the head folioq. If we clear an entire folioq, then 245 + * we can get rid of it provided it's not also the tail folioq 246 + * being filled by the issuer. 247 + */ 248 + folioq_clear(folioq, slot); 249 + slot++; 250 + if (slot >= folioq_nr_slots(folioq)) { 251 + if (READ_ONCE(wreq->buffer_tail) == folioq) 252 + break; 253 + folioq = netfs_delete_buffer_head(wreq); 254 + slot = 0; 255 + } 256 + 257 + if (fpos + fsize >= collected_to) 258 + break; 259 + } 260 + 261 + wreq->buffer = folioq; 262 + wreq->buffer_head_slot = slot; 263 + return made_progress; 264 + }
+256
fs/netfs/read_retry.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem read subrequest retrying. 3 + * 4 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/fs.h> 9 + #include <linux/slab.h> 10 + #include "internal.h" 11 + 12 + static void netfs_reissue_read(struct netfs_io_request *rreq, 13 + struct netfs_io_subrequest *subreq) 14 + { 15 + struct iov_iter *io_iter = &subreq->io_iter; 16 + 17 + if (iov_iter_is_folioq(io_iter)) { 18 + subreq->curr_folioq = (struct folio_queue *)io_iter->folioq; 19 + subreq->curr_folioq_slot = io_iter->folioq_slot; 20 + subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; 21 + } 22 + 23 + atomic_inc(&rreq->nr_outstanding); 24 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 25 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 26 + subreq->rreq->netfs_ops->issue_read(subreq); 27 + } 28 + 29 + /* 30 + * Go through the list of failed/short reads, retrying all retryable ones. We 31 + * need to switch failed cache reads to network downloads. 32 + */ 33 + static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) 34 + { 35 + struct netfs_io_subrequest *subreq; 36 + struct netfs_io_stream *stream0 = &rreq->io_streams[0]; 37 + LIST_HEAD(sublist); 38 + LIST_HEAD(queue); 39 + 40 + _enter("R=%x", rreq->debug_id); 41 + 42 + if (list_empty(&rreq->subrequests)) 43 + return; 44 + 45 + if (rreq->netfs_ops->retry_request) 46 + rreq->netfs_ops->retry_request(rreq, NULL); 47 + 48 + /* If there's no renegotiation to do, just resend each retryable subreq 49 + * up to the first permanently failed one. 50 + */ 51 + if (!rreq->netfs_ops->prepare_read && 52 + !test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) { 53 + struct netfs_io_subrequest *subreq; 54 + 55 + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 56 + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 57 + break; 58 + if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 59 + netfs_reset_iter(subreq); 60 + netfs_reissue_read(rreq, subreq); 61 + } 62 + } 63 + return; 64 + } 65 + 66 + /* Okay, we need to renegotiate all the download requests and flip any 67 + * failed cache reads over to being download requests and negotiate 68 + * those also. All fully successful subreqs have been removed from the 69 + * list and any spare data from those has been donated. 70 + * 71 + * What we do is decant the list and rebuild it one subreq at a time so 72 + * that we don't end up with donations jumping over a gap we're busy 73 + * populating with smaller subrequests. In the event that the subreq 74 + * we just launched finishes before we insert the next subreq, it'll 75 + * fill in rreq->prev_donated instead. 76 + 77 + * Note: Alternatively, we could split the tail subrequest right before 78 + * we reissue it and fix up the donations under lock. 79 + */ 80 + list_splice_init(&rreq->subrequests, &queue); 81 + 82 + do { 83 + struct netfs_io_subrequest *from; 84 + struct iov_iter source; 85 + unsigned long long start, len; 86 + size_t part, deferred_next_donated = 0; 87 + bool boundary = false; 88 + 89 + /* Go through the subreqs and find the next span of contiguous 90 + * buffer that we then rejig (cifs, for example, needs the 91 + * rsize renegotiating) and reissue. 92 + */ 93 + from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link); 94 + list_move_tail(&from->rreq_link, &sublist); 95 + start = from->start + from->transferred; 96 + len = from->len - from->transferred; 97 + 98 + _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx", 99 + rreq->debug_id, from->debug_index, 100 + from->start, from->consumed, from->transferred, from->len); 101 + 102 + if (test_bit(NETFS_SREQ_FAILED, &from->flags) || 103 + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) 104 + goto abandon; 105 + 106 + deferred_next_donated = from->next_donated; 107 + while ((subreq = list_first_entry_or_null( 108 + &queue, struct netfs_io_subrequest, rreq_link))) { 109 + if (subreq->start != start + len || 110 + subreq->transferred > 0 || 111 + !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) 112 + break; 113 + list_move_tail(&subreq->rreq_link, &sublist); 114 + len += subreq->len; 115 + deferred_next_donated = subreq->next_donated; 116 + if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags)) 117 + break; 118 + } 119 + 120 + _debug(" - range: %llx-%llx %llx", start, start + len - 1, len); 121 + 122 + /* Determine the set of buffers we're going to use. Each 123 + * subreq gets a subset of a single overall contiguous buffer. 124 + */ 125 + netfs_reset_iter(from); 126 + source = from->io_iter; 127 + source.count = len; 128 + 129 + /* Work through the sublist. */ 130 + while ((subreq = list_first_entry_or_null( 131 + &sublist, struct netfs_io_subrequest, rreq_link))) { 132 + list_del(&subreq->rreq_link); 133 + 134 + subreq->source = NETFS_DOWNLOAD_FROM_SERVER; 135 + subreq->start = start - subreq->transferred; 136 + subreq->len = len + subreq->transferred; 137 + stream0->sreq_max_len = subreq->len; 138 + 139 + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 140 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 141 + 142 + spin_lock_bh(&rreq->lock); 143 + list_add_tail(&subreq->rreq_link, &rreq->subrequests); 144 + subreq->prev_donated += rreq->prev_donated; 145 + rreq->prev_donated = 0; 146 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 147 + spin_unlock_bh(&rreq->lock); 148 + 149 + BUG_ON(!len); 150 + 151 + /* Renegotiate max_len (rsize) */ 152 + if (rreq->netfs_ops->prepare_read(subreq) < 0) { 153 + trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed); 154 + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 155 + } 156 + 157 + part = umin(len, stream0->sreq_max_len); 158 + if (unlikely(rreq->io_streams[0].sreq_max_segs)) 159 + part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs); 160 + subreq->len = subreq->transferred + part; 161 + subreq->io_iter = source; 162 + iov_iter_truncate(&subreq->io_iter, part); 163 + iov_iter_advance(&source, part); 164 + len -= part; 165 + start += part; 166 + if (!len) { 167 + if (boundary) 168 + __set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); 169 + subreq->next_donated = deferred_next_donated; 170 + } else { 171 + __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); 172 + subreq->next_donated = 0; 173 + } 174 + 175 + netfs_reissue_read(rreq, subreq); 176 + if (!len) 177 + break; 178 + 179 + /* If we ran out of subrequests, allocate another. */ 180 + if (list_empty(&sublist)) { 181 + subreq = netfs_alloc_subrequest(rreq); 182 + if (!subreq) 183 + goto abandon; 184 + subreq->source = NETFS_DOWNLOAD_FROM_SERVER; 185 + subreq->start = start; 186 + 187 + /* We get two refs, but need just one. */ 188 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_new); 189 + trace_netfs_sreq(subreq, netfs_sreq_trace_split); 190 + list_add_tail(&subreq->rreq_link, &sublist); 191 + } 192 + } 193 + 194 + /* If we managed to use fewer subreqs, we can discard the 195 + * excess. 196 + */ 197 + while ((subreq = list_first_entry_or_null( 198 + &sublist, struct netfs_io_subrequest, rreq_link))) { 199 + trace_netfs_sreq(subreq, netfs_sreq_trace_discard); 200 + list_del(&subreq->rreq_link); 201 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); 202 + } 203 + 204 + } while (!list_empty(&queue)); 205 + 206 + return; 207 + 208 + /* If we hit ENOMEM, fail all remaining subrequests */ 209 + abandon: 210 + list_splice_init(&sublist, &queue); 211 + list_for_each_entry(subreq, &queue, rreq_link) { 212 + if (!subreq->error) 213 + subreq->error = -ENOMEM; 214 + __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); 215 + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 216 + __clear_bit(NETFS_SREQ_RETRYING, &subreq->flags); 217 + } 218 + spin_lock_bh(&rreq->lock); 219 + list_splice_tail_init(&queue, &rreq->subrequests); 220 + spin_unlock_bh(&rreq->lock); 221 + } 222 + 223 + /* 224 + * Retry reads. 225 + */ 226 + void netfs_retry_reads(struct netfs_io_request *rreq) 227 + { 228 + trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); 229 + 230 + atomic_inc(&rreq->nr_outstanding); 231 + 232 + netfs_retry_read_subrequests(rreq); 233 + 234 + if (atomic_dec_and_test(&rreq->nr_outstanding)) 235 + netfs_rreq_terminated(rreq, false); 236 + } 237 + 238 + /* 239 + * Unlock any the pages that haven't been unlocked yet due to abandoned 240 + * subrequests. 241 + */ 242 + void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq) 243 + { 244 + struct folio_queue *p; 245 + 246 + for (p = rreq->buffer; p; p = p->next) { 247 + for (int slot = 0; slot < folioq_count(p); slot++) { 248 + struct folio *folio = folioq_folio(p, slot); 249 + 250 + if (folio && !folioq_is_marked2(p, slot)) { 251 + trace_netfs_folio(folio, netfs_folio_trace_abandon); 252 + folio_unlock(folio); 253 + } 254 + } 255 + } 256 + }
+4 -2
fs/netfs/stats.c
··· 32 32 atomic_t netfs_n_wh_writethrough; 33 33 atomic_t netfs_n_wh_dio_write; 34 34 atomic_t netfs_n_wh_writepages; 35 + atomic_t netfs_n_wh_copy_to_cache; 35 36 atomic_t netfs_n_wh_wstream_conflict; 36 37 atomic_t netfs_n_wh_upload; 37 38 atomic_t netfs_n_wh_upload_done; ··· 52 51 atomic_read(&netfs_n_rh_read_folio), 53 52 atomic_read(&netfs_n_rh_write_begin), 54 53 atomic_read(&netfs_n_rh_write_zskip)); 55 - seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u\n", 54 + seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n", 56 55 atomic_read(&netfs_n_wh_buffered_write), 57 56 atomic_read(&netfs_n_wh_writethrough), 58 57 atomic_read(&netfs_n_wh_dio_write), 59 - atomic_read(&netfs_n_wh_writepages)); 58 + atomic_read(&netfs_n_wh_writepages), 59 + atomic_read(&netfs_n_wh_copy_to_cache)); 60 60 seq_printf(m, "ZeroOps: ZR=%u sh=%u sk=%u\n", 61 61 atomic_read(&netfs_n_rh_zero), 62 62 atomic_read(&netfs_n_rh_short_read),
+8 -1
fs/netfs/write_collect.c
··· 87 87 unsigned long long collected_to = wreq->collected_to; 88 88 unsigned int slot = wreq->buffer_head_slot; 89 89 90 + if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) { 91 + if (netfs_pgpriv2_unlock_copied_folios(wreq)) 92 + *notes |= MADE_PROGRESS; 93 + return; 94 + } 95 + 90 96 if (slot >= folioq_nr_slots(folioq)) { 91 97 folioq = netfs_delete_buffer_head(wreq); 92 98 slot = 0; ··· 389 383 smp_rmb(); 390 384 collected_to = ULLONG_MAX; 391 385 if (wreq->origin == NETFS_WRITEBACK || 392 - wreq->origin == NETFS_WRITETHROUGH) 386 + wreq->origin == NETFS_WRITETHROUGH || 387 + wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) 393 388 notes = BUFFERED; 394 389 else 395 390 notes = 0;
+7 -10
fs/netfs/write_issue.c
··· 95 95 struct netfs_io_request *wreq; 96 96 struct netfs_inode *ictx; 97 97 bool is_buffered = (origin == NETFS_WRITEBACK || 98 - origin == NETFS_WRITETHROUGH); 98 + origin == NETFS_WRITETHROUGH || 99 + origin == NETFS_PGPRIV2_COPY_TO_CACHE); 99 100 100 101 wreq = netfs_alloc_request(mapping, file, start, 0, origin); 101 102 if (IS_ERR(wreq)) ··· 161 160 subreq->io_iter = wreq->io_iter; 162 161 163 162 _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 164 - 165 - trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 166 - refcount_read(&subreq->ref), 167 - netfs_sreq_trace_new); 168 163 169 164 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 170 165 ··· 238 241 netfs_do_issue_write(stream, subreq); 239 242 } 240 243 241 - static void netfs_issue_write(struct netfs_io_request *wreq, 242 - struct netfs_io_stream *stream) 244 + void netfs_issue_write(struct netfs_io_request *wreq, 245 + struct netfs_io_stream *stream) 243 246 { 244 247 struct netfs_io_subrequest *subreq = stream->construct; 245 248 ··· 256 259 * we can avoid overrunning the credits obtained (cifs) and try to parallelise 257 260 * content-crypto preparation with network writes. 258 261 */ 259 - static int netfs_advance_write(struct netfs_io_request *wreq, 260 - struct netfs_io_stream *stream, 261 - loff_t start, size_t len, bool to_eof) 262 + int netfs_advance_write(struct netfs_io_request *wreq, 263 + struct netfs_io_stream *stream, 264 + loff_t start, size_t len, bool to_eof) 262 265 { 263 266 struct netfs_io_subrequest *subreq = stream->construct; 264 267 size_t part;
+6 -13
fs/nfs/fscache.c
··· 267 267 rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); 268 268 /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ 269 269 __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); 270 + rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize; 270 271 271 272 return 0; 272 273 } ··· 289 288 return netfs; 290 289 } 291 290 292 - static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq) 293 - { 294 - size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize; 295 - 296 - sreq->len = min(sreq->len, rsize); 297 - return true; 298 - } 299 - 300 291 static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) 301 292 { 302 293 struct nfs_netfs_io_data *netfs; ··· 297 304 struct nfs_open_context *ctx = sreq->rreq->netfs_priv; 298 305 struct page *page; 299 306 unsigned long idx; 307 + pgoff_t start, last; 300 308 int err; 301 - pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; 302 - pgoff_t last = ((sreq->start + sreq->len - 303 - sreq->transferred - 1) >> PAGE_SHIFT); 309 + 310 + start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; 311 + last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT); 304 312 305 313 nfs_pageio_init_read(&pgio, inode, false, 306 314 &nfs_async_read_completion_ops); 307 315 308 316 netfs = nfs_netfs_alloc(sreq); 309 317 if (!netfs) 310 - return netfs_subreq_terminated(sreq, -ENOMEM, false); 318 + return netfs_read_subreq_terminated(sreq, -ENOMEM, false); 311 319 312 320 pgio.pg_netfs = netfs; /* used in completion */ 313 321 ··· 374 380 .init_request = nfs_netfs_init_request, 375 381 .free_request = nfs_netfs_free_request, 376 382 .issue_read = nfs_netfs_issue_read, 377 - .clamp_length = nfs_netfs_clamp_length 378 383 };
+3 -4
fs/nfs/fscache.h
··· 60 60 61 61 static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) 62 62 { 63 - ssize_t final_len; 64 - 65 63 /* Only the last RPC completion should call netfs_subreq_terminated() */ 66 64 if (!refcount_dec_and_test(&netfs->refcount)) 67 65 return; ··· 72 74 * Correct the final length here to be no larger than the netfs subrequest 73 75 * length, and thus avoid netfs's "Subreq overread" warning message. 74 76 */ 75 - final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred)); 76 - netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false); 77 + netfs->sreq->transferred = min_t(s64, netfs->sreq->len, 78 + atomic64_read(&netfs->transferred)); 79 + netfs_read_subreq_terminated(netfs->sreq, netfs->error, false); 77 80 kfree(netfs); 78 81 } 79 82 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
-1
fs/smb/client/cifsglob.h
··· 1485 1485 struct cifs_io_request *req; 1486 1486 }; 1487 1487 ssize_t got_bytes; 1488 - size_t actual_len; 1489 1488 unsigned int xid; 1490 1489 int result; 1491 1490 bool have_xid;
+2 -4
fs/smb/client/cifssmb.c
··· 1309 1309 if (rdata->result == 0 || rdata->result == -EAGAIN) 1310 1310 iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); 1311 1311 rdata->credits.value = 0; 1312 - netfs_subreq_terminated(&rdata->subreq, 1313 - (rdata->result == 0 || rdata->result == -EAGAIN) ? 1314 - rdata->got_bytes : rdata->result, 1315 - false); 1312 + rdata->subreq.transferred += rdata->got_bytes; 1313 + netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false); 1316 1314 release_mid(mid); 1317 1315 add_credits(server, &credits, 0); 1318 1316 }
+28 -54
fs/smb/client/file.c
··· 112 112 goto fail; 113 113 } 114 114 115 - wdata->actual_len = wdata->subreq.len; 116 115 rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust); 117 116 if (rc) 118 117 goto fail; ··· 140 141 } 141 142 142 143 /* 143 - * Split the read up according to how many credits we can get for each piece. 144 - * It's okay to sleep here if we need to wait for more credit to become 145 - * available. 146 - * 147 - * We also choose the server and allocate an operation ID to be cleaned up 148 - * later. 144 + * Negotiate the size of a read operation on behalf of the netfs library. 149 145 */ 150 - static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) 146 + static int cifs_prepare_read(struct netfs_io_subrequest *subreq) 151 147 { 152 148 struct netfs_io_request *rreq = subreq->rreq; 153 - struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; 154 149 struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 155 150 struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 156 151 struct TCP_Server_Info *server = req->server; 157 152 struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 158 - int rc; 153 + size_t size; 154 + int rc = 0; 159 155 160 - rdata->xid = get_xid(); 161 - rdata->have_xid = true; 156 + if (!rdata->have_xid) { 157 + rdata->xid = get_xid(); 158 + rdata->have_xid = true; 159 + } 162 160 rdata->server = server; 163 161 164 162 if (cifs_sb->ctx->rsize == 0) ··· 163 167 server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), 164 168 cifs_sb->ctx); 165 169 166 - 167 170 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 168 - &stream->sreq_max_len, &rdata->credits); 169 - if (rc) { 170 - subreq->error = rc; 171 - return false; 172 - } 171 + &size, &rdata->credits); 172 + if (rc) 173 + return rc; 174 + 175 + rreq->io_streams[0].sreq_max_len = size; 173 176 174 177 rdata->credits.in_flight_check = 1; 175 178 rdata->credits.rreq_debug_id = rreq->debug_id; ··· 180 185 server->credits, server->in_flight, 0, 181 186 cifs_trace_rw_credits_read_submit); 182 187 183 - subreq->len = umin(subreq->len, stream->sreq_max_len); 184 - rdata->actual_len = subreq->len; 185 - 186 188 #ifdef CONFIG_CIFS_SMB_DIRECT 187 189 if (server->smbd_conn) 188 - stream->sreq_max_segs = server->smbd_conn->max_frmr_depth; 190 + rreq->io_streams[0].sreq_max_segs = server->smbd_conn->max_frmr_depth; 189 191 #endif 190 - return true; 192 + return 0; 191 193 } 192 194 193 195 /* ··· 193 201 * to only read a portion of that, but as long as we read something, the netfs 194 202 * helper will call us again so that we can issue another read. 195 203 */ 196 - static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) 204 + static void cifs_issue_read(struct netfs_io_subrequest *subreq) 197 205 { 198 206 struct netfs_io_request *rreq = subreq->rreq; 199 207 struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 200 208 struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 201 209 struct TCP_Server_Info *server = req->server; 202 - struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 203 210 int rc = 0; 204 211 205 212 cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", 206 213 __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, 207 214 subreq->transferred, subreq->len); 208 215 209 - if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { 210 - /* 211 - * As we're issuing a retry, we need to negotiate some new 212 - * credits otherwise the server may reject the op with 213 - * INVALID_PARAMETER. Note, however, we may get back less 214 - * credit than we need to complete the op, in which case, we 215 - * shorten the op and rely on additional rounds of retry. 216 - */ 217 - size_t rsize = umin(subreq->len - subreq->transferred, 218 - cifs_sb->ctx->rsize); 219 - 220 - rc = server->ops->wait_mtu_credits(server, rsize, &rdata->actual_len, 221 - &rdata->credits); 222 - if (rc) 223 - goto out; 224 - 225 - rdata->credits.in_flight_check = 1; 226 - 227 - trace_smb3_rw_credits(rdata->rreq->debug_id, 228 - rdata->subreq.debug_index, 229 - rdata->credits.value, 230 - server->credits, server->in_flight, 0, 231 - cifs_trace_rw_credits_read_resubmit); 232 - } 216 + rc = adjust_credits(server, rdata, cifs_trace_rw_credits_issue_read_adjust); 217 + if (rc) 218 + goto failed; 233 219 234 220 if (req->cfile->invalidHandle) { 235 221 do { 236 222 rc = cifs_reopen_file(req->cfile, true); 237 223 } while (rc == -EAGAIN); 238 224 if (rc) 239 - goto out; 225 + goto failed; 240 226 } 241 227 242 228 if (subreq->rreq->origin != NETFS_DIO_READ) 243 229 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 244 230 231 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 245 232 rc = rdata->server->ops->async_readv(rdata); 246 - out: 247 233 if (rc) 248 - netfs_subreq_terminated(subreq, rc, false); 234 + goto failed; 235 + return; 236 + 237 + failed: 238 + netfs_read_subreq_terminated(subreq, rc, false); 249 239 } 250 240 251 241 /* ··· 338 364 .init_request = cifs_init_request, 339 365 .free_request = cifs_free_request, 340 366 .free_subrequest = cifs_free_subrequest, 341 - .clamp_length = cifs_clamp_length, 342 - .issue_read = cifs_req_issue_read, 367 + .prepare_read = cifs_prepare_read, 368 + .issue_read = cifs_issue_read, 343 369 .done = cifs_rreq_done, 344 370 .begin_writeback = cifs_begin_writeback, 345 371 .prepare_write = cifs_prepare_write,
+2 -1
fs/smb/client/smb2ops.c
··· 301 301 unsigned int /*enum smb3_rw_credits_trace*/ trace) 302 302 { 303 303 struct cifs_credits *credits = &subreq->credits; 304 - int new_val = DIV_ROUND_UP(subreq->actual_len, SMB2_MAX_BUFFER_SIZE); 304 + int new_val = DIV_ROUND_UP(subreq->subreq.len - subreq->subreq.transferred, 305 + SMB2_MAX_BUFFER_SIZE); 305 306 int scredits, in_flight; 306 307 307 308 if (!credits->value || credits->value == new_val)
+16 -11
fs/smb/client/smb2pdu.c
··· 4498 4498 struct cifs_io_subrequest *rdata = 4499 4499 container_of(work, struct cifs_io_subrequest, subreq.work); 4500 4500 4501 - netfs_subreq_terminated(&rdata->subreq, 4502 - (rdata->result == 0 || rdata->result == -EAGAIN) ? 4503 - rdata->got_bytes : rdata->result, true); 4501 + netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false); 4504 4502 } 4505 4503 4506 4504 static void ··· 4530 4532 4531 4533 cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n", 4532 4534 __func__, mid->mid, mid->mid_state, rdata->result, 4533 - rdata->actual_len, rdata->subreq.len - rdata->subreq.transferred); 4535 + rdata->got_bytes, rdata->subreq.len - rdata->subreq.transferred); 4534 4536 4535 4537 switch (mid->mid_state) { 4536 4538 case MID_RESPONSE_RECEIVED: ··· 4552 4554 break; 4553 4555 case MID_REQUEST_SUBMITTED: 4554 4556 case MID_RETRY_NEEDED: 4557 + __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); 4555 4558 rdata->result = -EAGAIN; 4556 4559 if (server->sign && rdata->got_bytes) 4557 4560 /* reset bytes number since we can not check a sign */ ··· 4587 4588 rdata->req->cfile->fid.persistent_fid, 4588 4589 tcon->tid, tcon->ses->Suid, 4589 4590 rdata->subreq.start + rdata->subreq.transferred, 4590 - rdata->actual_len, 4591 + rdata->subreq.len - rdata->subreq.transferred, 4591 4592 rdata->result); 4592 4593 } else 4593 4594 trace_smb3_read_done(rdata->rreq->debug_id, ··· 4602 4603 __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); 4603 4604 rdata->result = 0; 4604 4605 } else { 4605 - if (rdata->got_bytes < rdata->actual_len && 4606 - rdata->subreq.start + rdata->subreq.transferred + rdata->got_bytes == 4607 - ictx->remote_i_size) { 4606 + size_t trans = rdata->subreq.transferred + rdata->got_bytes; 4607 + if (trans < rdata->subreq.len && 4608 + rdata->subreq.start + trans == ictx->remote_i_size) { 4608 4609 __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); 4609 4610 rdata->result = 0; 4610 4611 } ··· 4613 4614 server->credits, server->in_flight, 4614 4615 0, cifs_trace_rw_credits_read_response_clear); 4615 4616 rdata->credits.value = 0; 4617 + rdata->subreq.transferred += rdata->got_bytes; 4618 + if (rdata->subreq.start + rdata->subreq.transferred >= rdata->subreq.rreq->i_size) 4619 + __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); 4620 + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); 4616 4621 INIT_WORK(&rdata->subreq.work, smb2_readv_worker); 4617 4622 queue_work(cifsiod_wq, &rdata->subreq.work); 4618 4623 release_mid(mid); ··· 4651 4648 io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink); 4652 4649 io_parms.server = server = rdata->server; 4653 4650 io_parms.offset = subreq->start + subreq->transferred; 4654 - io_parms.length = rdata->actual_len; 4651 + io_parms.length = subreq->len - subreq->transferred; 4655 4652 io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; 4656 4653 io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; 4657 4654 io_parms.pid = rdata->req->pid; ··· 4672 4669 shdr = (struct smb2_hdr *)buf; 4673 4670 4674 4671 if (rdata->credits.value > 0) { 4675 - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->actual_len, 4672 + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(io_parms.length, 4676 4673 SMB2_MAX_BUFFER_SIZE)); 4677 4674 credit_request = le16_to_cpu(shdr->CreditCharge) + 8; 4678 4675 if (server->credits >= server->max_credits) ··· 4700 4697 rdata->xid, io_parms.persistent_fid, 4701 4698 io_parms.tcon->tid, 4702 4699 io_parms.tcon->ses->Suid, 4703 - io_parms.offset, rdata->actual_len, rc); 4700 + io_parms.offset, 4701 + subreq->len - subreq->transferred, rc); 4704 4702 } 4705 4703 4706 4704 async_readv_out: ··· 4884 4880 server->credits, server->in_flight, 4885 4881 0, cifs_trace_rw_credits_write_response_clear); 4886 4882 wdata->credits.value = 0; 4883 + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); 4887 4884 cifs_write_subrequest_terminated(wdata, result ?: written, true); 4888 4885 release_mid(mid); 4889 4886 trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
+18
include/linux/folio_queue.h
··· 27 27 struct folio_queue *prev; /* Previous queue segment of NULL */ 28 28 unsigned long marks; /* 1-bit mark per folio */ 29 29 unsigned long marks2; /* Second 1-bit mark per folio */ 30 + unsigned long marks3; /* Third 1-bit mark per folio */ 30 31 #if PAGEVEC_SIZE > BITS_PER_LONG 31 32 #error marks is not big enough 32 33 #endif ··· 40 39 folioq->prev = NULL; 41 40 folioq->marks = 0; 42 41 folioq->marks2 = 0; 42 + folioq->marks3 = 0; 43 43 } 44 44 45 45 static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) ··· 89 87 clear_bit(slot, &folioq->marks2); 90 88 } 91 89 90 + static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) 91 + { 92 + return test_bit(slot, &folioq->marks3); 93 + } 94 + 95 + static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) 96 + { 97 + set_bit(slot, &folioq->marks3); 98 + } 99 + 100 + static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) 101 + { 102 + clear_bit(slot, &folioq->marks3); 103 + } 104 + 92 105 static inline unsigned int __folio_order(struct folio *folio) 93 106 { 94 107 if (!folio_test_large(folio)) ··· 150 133 folioq->vec.folios[slot] = NULL; 151 134 folioq_unmark(folioq, slot); 152 135 folioq_unmark2(folioq, slot); 136 + folioq_unmark3(folioq, slot); 153 137 } 154 138 155 139 #endif /* _LINUX_FOLIO_QUEUE_H */
+18 -8
include/linux/netfs.h
··· 178 178 unsigned long long start; /* Where to start the I/O */ 179 179 size_t len; /* Size of the I/O */ 180 180 size_t transferred; /* Amount of data transferred */ 181 + size_t consumed; /* Amount of read data consumed */ 182 + size_t prev_donated; /* Amount of data donated from previous subreq */ 183 + size_t next_donated; /* Amount of data donated from next subreq */ 181 184 refcount_t ref; 182 185 short error; /* 0 or error that occurred */ 183 186 unsigned short debug_index; /* Index in list (for debugging output) */ 184 187 unsigned int nr_segs; /* Number of segs in io_iter */ 185 188 enum netfs_io_source source; /* Where to read from/write to */ 186 189 unsigned char stream_nr; /* I/O stream this belongs to */ 190 + unsigned char curr_folioq_slot; /* Folio currently being read */ 191 + unsigned char curr_folio_order; /* Order of folio */ 192 + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ 187 193 unsigned long flags; 188 194 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ 189 195 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ 190 - #define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ 191 196 #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ 192 197 #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ 193 198 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ 194 199 #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ 200 + #define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */ 195 201 #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ 196 202 #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ 197 203 #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ 198 204 #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ 199 - #define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ 200 205 }; 201 206 202 207 enum netfs_io_origin { 203 208 NETFS_READAHEAD, /* This read was triggered by readahead */ 204 209 NETFS_READPAGE, /* This read is a synchronous read */ 210 + NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ 205 211 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ 206 212 NETFS_DIO_READ, /* This is a direct I/O read */ 207 213 NETFS_WRITEBACK, /* This write was triggered by writepages */ 208 214 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ 209 215 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ 210 216 NETFS_DIO_WRITE, /* This is a direct I/O write */ 217 + NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */ 211 218 nr__netfs_io_origin 212 219 } __mode(byte); 213 220 ··· 231 224 struct address_space *mapping; /* The mapping being accessed */ 232 225 struct kiocb *iocb; /* AIO completion vector */ 233 226 struct netfs_cache_resources cache_resources; 227 + struct readahead_control *ractl; /* Readahead descriptor */ 234 228 struct list_head proc_link; /* Link in netfs_iorequests */ 235 229 struct list_head subrequests; /* Contributory I/O operations */ 236 230 struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ ··· 252 244 unsigned int nr_group_rel; /* Number of refs to release on ->group */ 253 245 spinlock_t lock; /* Lock for queuing subreqs */ 254 246 atomic_t nr_outstanding; /* Number of ops in progress */ 255 - atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ 256 - size_t upper_len; /* Length can be extended to here */ 257 247 unsigned long long submitted; /* Amount submitted for I/O so far */ 258 248 unsigned long long len; /* Length of the request */ 259 249 size_t transferred; /* Amount to be indicated as transferred */ 260 - short error; /* 0 or error that occurred */ 250 + long error; /* 0 or error that occurred */ 261 251 enum netfs_io_origin origin; /* Origin of the request */ 262 252 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ 263 253 u8 buffer_head_slot; /* First slot in ->buffer */ ··· 266 260 unsigned long long collected_to; /* Point we've collected to */ 267 261 unsigned long long cleaned_to; /* Position we've cleaned folios to */ 268 262 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ 263 + size_t prev_donated; /* Fallback for subreq->prev_donated */ 269 264 refcount_t ref; 270 265 unsigned long flags; 271 - #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ 272 266 #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ 273 267 #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ 274 268 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ ··· 280 274 #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ 281 275 #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ 282 276 #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ 277 + #define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ 283 278 #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 284 279 * write to cache on read */ 285 280 const struct netfs_request_ops *netfs_ops; ··· 299 292 300 293 /* Read request handling */ 301 294 void (*expand_readahead)(struct netfs_io_request *rreq); 302 - bool (*clamp_length)(struct netfs_io_subrequest *subreq); 295 + int (*prepare_read)(struct netfs_io_subrequest *subreq); 303 296 void (*issue_read)(struct netfs_io_subrequest *subreq); 304 297 bool (*is_still_valid)(struct netfs_io_request *rreq); 305 298 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, ··· 429 422 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); 430 423 431 424 /* (Sub)request management API. */ 432 - void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); 425 + void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, 426 + bool was_async); 427 + void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, 428 + int error, bool was_async); 433 429 void netfs_get_subrequest(struct netfs_io_subrequest *subreq, 434 430 enum netfs_sreq_ref_trace what); 435 431 void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
+98 -5
include/trace/events/netfs.h
··· 20 20 EM(netfs_read_trace_expanded, "EXPANDED ") \ 21 21 EM(netfs_read_trace_readahead, "READAHEAD") \ 22 22 EM(netfs_read_trace_readpage, "READPAGE ") \ 23 + EM(netfs_read_trace_read_gaps, "READ-GAPS") \ 23 24 EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ 24 25 E_(netfs_read_trace_write_begin, "WRITEBEGN") 25 26 ··· 34 33 #define netfs_rreq_origins \ 35 34 EM(NETFS_READAHEAD, "RA") \ 36 35 EM(NETFS_READPAGE, "RP") \ 36 + EM(NETFS_READ_GAPS, "RG") \ 37 37 EM(NETFS_READ_FOR_WRITE, "RW") \ 38 38 EM(NETFS_DIO_READ, "DR") \ 39 39 EM(NETFS_WRITEBACK, "WB") \ 40 40 EM(NETFS_WRITETHROUGH, "WT") \ 41 41 EM(NETFS_UNBUFFERED_WRITE, "UW") \ 42 - E_(NETFS_DIO_WRITE, "DW") 42 + EM(NETFS_DIO_WRITE, "DW") \ 43 + E_(NETFS_PGPRIV2_COPY_TO_CACHE, "2C") 43 44 44 45 #define netfs_rreq_traces \ 45 46 EM(netfs_rreq_trace_assess, "ASSESS ") \ ··· 72 69 E_(NETFS_INVALID_WRITE, "INVL") 73 70 74 71 #define netfs_sreq_traces \ 72 + EM(netfs_sreq_trace_add_donations, "+DON ") \ 73 + EM(netfs_sreq_trace_added, "ADD ") \ 74 + EM(netfs_sreq_trace_clear, "CLEAR") \ 75 75 EM(netfs_sreq_trace_discard, "DSCRD") \ 76 + EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ 77 + EM(netfs_sreq_trace_donate_to_next, "DON-N") \ 76 78 EM(netfs_sreq_trace_download_instead, "RDOWN") \ 77 79 EM(netfs_sreq_trace_fail, "FAIL ") \ 78 80 EM(netfs_sreq_trace_free, "FREE ") \ 81 + EM(netfs_sreq_trace_hit_eof, "EOF ") \ 82 + EM(netfs_sreq_trace_io_progress, "IO ") \ 79 83 EM(netfs_sreq_trace_limited, "LIMIT") \ 80 84 EM(netfs_sreq_trace_prepare, "PREP ") \ 81 85 EM(netfs_sreq_trace_prep_failed, "PRPFL") \ 82 - EM(netfs_sreq_trace_resubmit_short, "SHORT") \ 86 + EM(netfs_sreq_trace_progress, "PRGRS") \ 87 + EM(netfs_sreq_trace_reprep_failed, "REPFL") \ 83 88 EM(netfs_sreq_trace_retry, "RETRY") \ 89 + EM(netfs_sreq_trace_short, "SHORT") \ 90 + EM(netfs_sreq_trace_split, "SPLIT") \ 84 91 EM(netfs_sreq_trace_submit, "SUBMT") \ 85 92 EM(netfs_sreq_trace_terminated, "TERM ") \ 86 93 EM(netfs_sreq_trace_write, "WRITE") \ ··· 131 118 EM(netfs_sreq_trace_new, "NEW ") \ 132 119 EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ 133 120 EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ 134 - EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ 121 + EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \ 135 122 EM(netfs_sreq_trace_put_done, "PUT DONE ") \ 136 123 EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ 137 124 EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ ··· 151 138 EM(netfs_flush_content, "flush") \ 152 139 EM(netfs_streaming_filled_page, "mod-streamw-f") \ 153 140 EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ 141 + EM(netfs_folio_trace_abandon, "abandon") \ 154 142 EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ 155 143 EM(netfs_folio_trace_clear, "clear") \ 156 144 EM(netfs_folio_trace_clear_cc, "clear-cc") \ ··· 168 154 EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ 169 155 EM(netfs_folio_trace_not_under_wback, "!wback") \ 170 156 EM(netfs_folio_trace_put, "put") \ 157 + EM(netfs_folio_trace_read, "read") \ 158 + EM(netfs_folio_trace_read_done, "read-done") \ 171 159 EM(netfs_folio_trace_read_gaps, "read-gaps") \ 160 + EM(netfs_folio_trace_read_put, "read-put") \ 161 + EM(netfs_folio_trace_read_unlock, "read-unlock") \ 172 162 EM(netfs_folio_trace_redirtied, "redirtied") \ 173 163 EM(netfs_folio_trace_store, "store") \ 174 164 EM(netfs_folio_trace_store_copy, "store-copy") \ ··· 184 166 EM(netfs_contig_trace_collect, "Collect") \ 185 167 EM(netfs_contig_trace_jump, "-->JUMP-->") \ 186 168 E_(netfs_contig_trace_unlock, "Unlock") 169 + 170 + #define netfs_donate_traces \ 171 + EM(netfs_trace_donate_tail_to_prev, "tail-to-prev") \ 172 + EM(netfs_trace_donate_to_prev, "to-prev") \ 173 + EM(netfs_trace_donate_to_next, "to-next") \ 174 + E_(netfs_trace_donate_to_deferred_next, "defer-next") 187 175 188 176 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY 189 177 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY ··· 208 184 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); 209 185 enum netfs_folio_trace { netfs_folio_traces } __mode(byte); 210 186 enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); 187 + enum netfs_donate_trace { netfs_donate_traces } __mode(byte); 211 188 212 189 #endif 213 190 ··· 231 206 netfs_sreq_ref_traces; 232 207 netfs_folio_traces; 233 208 netfs_collect_contig_traces; 209 + netfs_donate_traces; 234 210 235 211 /* 236 212 * Now redefine the EM() and E_() macros to map the enums to the strings that ··· 252 226 TP_STRUCT__entry( 253 227 __field(unsigned int, rreq ) 254 228 __field(unsigned int, cookie ) 229 + __field(loff_t, i_size ) 255 230 __field(loff_t, start ) 256 231 __field(size_t, len ) 257 232 __field(enum netfs_read_trace, what ) ··· 262 235 TP_fast_assign( 263 236 __entry->rreq = rreq->debug_id; 264 237 __entry->cookie = rreq->cache_resources.debug_id; 238 + __entry->i_size = rreq->i_size; 265 239 __entry->start = start; 266 240 __entry->len = len; 267 241 __entry->what = what; 268 242 __entry->netfs_inode = rreq->inode->i_ino; 269 243 ), 270 244 271 - TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", 245 + TP_printk("R=%08x %s c=%08x ni=%x s=%llx l=%zx sz=%llx", 272 246 __entry->rreq, 273 247 __print_symbolic(__entry->what, netfs_read_traces), 274 248 __entry->cookie, 275 249 __entry->netfs_inode, 276 - __entry->start, __entry->len) 250 + __entry->start, __entry->len, __entry->i_size) 277 251 ); 278 252 279 253 TRACE_EVENT(netfs_rreq, ··· 677 649 TP_printk("R=%08x[%x:] cto=%llx frn=%llx", 678 650 __entry->wreq, __entry->stream, 679 651 __entry->collected_to, __entry->front) 652 + ); 653 + 654 + TRACE_EVENT(netfs_progress, 655 + TP_PROTO(const struct netfs_io_subrequest *subreq, 656 + unsigned long long start, size_t avail, size_t part), 657 + 658 + TP_ARGS(subreq, start, avail, part), 659 + 660 + TP_STRUCT__entry( 661 + __field(unsigned int, rreq) 662 + __field(unsigned int, subreq) 663 + __field(unsigned int, consumed) 664 + __field(unsigned int, transferred) 665 + __field(unsigned long long, f_start) 666 + __field(unsigned int, f_avail) 667 + __field(unsigned int, f_part) 668 + __field(unsigned char, slot) 669 + ), 670 + 671 + TP_fast_assign( 672 + __entry->rreq = subreq->rreq->debug_id; 673 + __entry->subreq = subreq->debug_index; 674 + __entry->consumed = subreq->consumed; 675 + __entry->transferred = subreq->transferred; 676 + __entry->f_start = start; 677 + __entry->f_avail = avail; 678 + __entry->f_part = part; 679 + __entry->slot = subreq->curr_folioq_slot; 680 + ), 681 + 682 + TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x", 683 + __entry->rreq, __entry->subreq, __entry->f_start, 684 + __entry->consumed, __entry->transferred, 685 + __entry->f_part, __entry->f_avail, __entry->slot) 686 + ); 687 + 688 + TRACE_EVENT(netfs_donate, 689 + TP_PROTO(const struct netfs_io_request *rreq, 690 + const struct netfs_io_subrequest *from, 691 + const struct netfs_io_subrequest *to, 692 + size_t amount, 693 + enum netfs_donate_trace trace), 694 + 695 + TP_ARGS(rreq, from, to, amount, trace), 696 + 697 + TP_STRUCT__entry( 698 + __field(unsigned int, rreq) 699 + __field(unsigned int, from) 700 + __field(unsigned int, to) 701 + __field(unsigned int, amount) 702 + __field(enum netfs_donate_trace, trace) 703 + ), 704 + 705 + TP_fast_assign( 706 + __entry->rreq = rreq->debug_id; 707 + __entry->from = from->debug_index; 708 + __entry->to = to ? to->debug_index : -1; 709 + __entry->amount = amount; 710 + __entry->trace = trace; 711 + ), 712 + 713 + TP_printk("R=%08x[%02x] -> [%02x] %s am=%x", 714 + __entry->rreq, __entry->from, __entry->to, 715 + __print_symbolic(__entry->trace, netfs_donate_traces), 716 + __entry->amount) 680 717 ); 681 718 682 719 #undef EM