Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cachefiles, netfs: Fix write to partial block at EOF

Because it uses DIO writes, cachefiles is unable to make a write to the
backing file if that write is not aligned to and sized according to the
backing file's DIO block alignment. This makes it tricky to handle a write
to the cache where the EOF on the network file is not correctly aligned.

To get around this, netfslib attempts to tell the driver it is calling how
much more data there is available beyond the EOF that it can use to pad the
write (netfslib preclears the part of the folio above the EOF). However,
it tries to tell the cache what the maximum length is, but doesn't
calculate this correctly; and, in any case, cachefiles actually ignores the
value and just skips the block.

Fix this by:

(1) Change the value passed to indicate the amount of extra data that can
be added to the operation (now ->submit_extendable_to). This is much
simpler to calculate as it's just the end of the folio minus the top
of the data within the folio - rather than having to account for data
spread over multiple folios.

(2) Make cachefiles add some of this data if the subrequest it is given
ends at the network file's i_size if the extra data is sufficient to
pad out to a whole block.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-22-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

David Howells and committed by
Christian Brauner
c4f1450e 86b374d0

+19 -6
+14
fs/cachefiles/io.c
··· 648 648 struct netfs_cache_resources *cres = &wreq->cache_resources; 649 649 struct cachefiles_object *object = cachefiles_cres_object(cres); 650 650 struct cachefiles_cache *cache = object->volume->cache; 651 + struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; 651 652 const struct cred *saved_cred; 652 653 size_t off, pre, post, len = subreq->len; 653 654 loff_t start = subreq->start; ··· 662 661 if (off) { 663 662 pre = CACHEFILES_DIO_BLOCK_SIZE - off; 664 663 if (pre >= len) { 664 + fscache_count_dio_misfit(); 665 665 netfs_write_subrequest_terminated(subreq, len, false); 666 666 return; 667 667 } ··· 673 671 } 674 672 675 673 /* We also need to end on the cache granularity boundary */ 674 + if (start + len == wreq->i_size) { 675 + size_t part = len % CACHEFILES_DIO_BLOCK_SIZE; 676 + size_t need = CACHEFILES_DIO_BLOCK_SIZE - part; 677 + 678 + if (part && stream->submit_extendable_to >= need) { 679 + len += need; 680 + subreq->len += need; 681 + subreq->io_iter.count += need; 682 + } 683 + } 684 + 676 685 post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1); 677 686 if (post) { 678 687 len -= post; 679 688 if (len == 0) { 689 + fscache_count_dio_misfit(); 680 690 netfs_write_subrequest_terminated(subreq, post, false); 681 691 return; 682 692 }
+2 -2
fs/netfs/read_pgpriv2.c
··· 97 97 if (netfs_buffer_append_folio(wreq, folio, false) < 0) 98 98 return -ENOMEM; 99 99 100 - cache->submit_max_len = fsize; 100 + cache->submit_extendable_to = fsize; 101 101 cache->submit_off = 0; 102 102 cache->submit_len = flen; 103 103 ··· 112 112 wreq->io_iter.iov_offset = cache->submit_off; 113 113 114 114 atomic64_set(&wreq->issued_to, fpos + cache->submit_off); 115 + cache->submit_extendable_to = fsize - cache->submit_off; 115 116 part = netfs_advance_write(wreq, cache, fpos + cache->submit_off, 116 117 cache->submit_len, to_eof); 117 118 cache->submit_off += part; 118 - cache->submit_max_len -= part; 119 119 if (part > cache->submit_len) 120 120 cache->submit_len = 0; 121 121 else
+2 -3
fs/netfs/write_issue.c
··· 283 283 _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len); 284 284 subreq->len += part; 285 285 subreq->nr_segs++; 286 + stream->submit_extendable_to -= part; 286 287 287 288 if (subreq->len >= stream->sreq_max_len || 288 289 subreq->nr_segs >= stream->sreq_max_segs || ··· 425 424 */ 426 425 for (int s = 0; s < NR_IO_STREAMS; s++) { 427 426 stream = &wreq->io_streams[s]; 428 - stream->submit_max_len = fsize; 429 427 stream->submit_off = foff; 430 428 stream->submit_len = flen; 431 429 if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) || ··· 432 432 fgroup == NETFS_FOLIO_COPY_TO_CACHE)) { 433 433 stream->submit_off = UINT_MAX; 434 434 stream->submit_len = 0; 435 - stream->submit_max_len = 0; 436 435 } 437 436 } 438 437 ··· 461 462 wreq->io_iter.iov_offset = stream->submit_off; 462 463 463 464 atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 465 + stream->submit_extendable_to = fsize - stream->submit_off; 464 466 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 465 467 stream->submit_len, to_eof); 466 468 stream->submit_off += part; 467 - stream->submit_max_len -= part; 468 469 if (part > stream->submit_len) 469 470 stream->submit_len = 0; 470 471 else
+1 -1
include/linux/netfs.h
··· 135 135 unsigned int sreq_max_segs; /* 0 or max number of segments in an iterator */ 136 136 unsigned int submit_off; /* Folio offset we're submitting from */ 137 137 unsigned int submit_len; /* Amount of data left to submit */ 138 - unsigned int submit_max_len; /* Amount I/O can be rounded up to */ 138 + unsigned int submit_extendable_to; /* Amount I/O can be rounded up to */ 139 139 void (*prepare_write)(struct netfs_io_subrequest *subreq); 140 140 void (*issue_write)(struct netfs_io_subrequest *subreq); 141 141 /* Collection tracking */