Merge tag 'vfs-6.8.netfs' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

+4 -19

Documentation/filesystems/netfs_library.rst

··· 295 295 struct netfs_request_ops { 296 296 void (*init_request)(struct netfs_io_request *rreq, struct file *file); 297 297 void (*free_request)(struct netfs_io_request *rreq); 298 - int (*begin_cache_operation)(struct netfs_io_request *rreq); 299 298 void (*expand_readahead)(struct netfs_io_request *rreq); 300 299 bool (*clamp_length)(struct netfs_io_subrequest *subreq); 301 300 void (*issue_read)(struct netfs_io_subrequest *subreq); ··· 315 316 316 317 [Optional] This is called as the request is being deallocated so that the 317 318 filesystem can clean up any state it has attached there. 318 - 319 - * ``begin_cache_operation()`` 320 - 321 - [Optional] This is called to ask the network filesystem to call into the 322 - cache (if present) to initialise the caching state for this read. The netfs 323 - library module cannot access the cache directly, so the cache should call 324 - something like fscache_begin_read_operation() to do this. 325 - 326 - The cache gets to store its state in ->cache_resources and must set a table 327 - of operations of its own there (though of a different type). 328 - 329 - This should return 0 on success and an error code otherwise. If an error is 330 - reported, the operation may proceed anyway, just without local caching (only 331 - out of memory and interruption errors cause failure here). 332 319 333 320 * ``expand_readahead()`` 334 321 ··· 445 460 required: some way for the network filesystem to initialise the caching for a 446 461 read request and a table of operations for the helpers to call. 447 462 448 - The network filesystem's ->begin_cache_operation() method is called to set up a 449 - cache and this must call into the cache to do the work. If using fscache, for 450 - example, the cache would call:: 463 + To begin a cache operation on an fscache object, the following function is 464 + called:: 451 465 452 466 int fscache_begin_read_operation(struct netfs_io_request *rreq, 453 467 struct fscache_cookie *cookie); 454 468 455 - passing in the request pointer and the cookie corresponding to the file. 469 + passing in the request pointer and the cookie corresponding to the file. This 470 + fills in the cache resources mentioned below. 456 471 457 472 The netfs_io_request object contains a place for the cache to hang its 458 473 state::

+13 -8

MAINTAINERS

··· 8214 8214 F: fs/iomap/ 8215 8215 F: include/linux/iomap.h 8216 8216 8217 + FILESYSTEMS [NETFS LIBRARY] 8218 + M: David Howells <dhowells@redhat.com> 8219 + L: linux-cachefs@redhat.com (moderated for non-subscribers) 8220 + L: linux-fsdevel@vger.kernel.org 8221 + S: Supported 8222 + F: Documentation/filesystems/caching/ 8223 + F: Documentation/filesystems/netfs_library.rst 8224 + F: fs/netfs/ 8225 + F: include/linux/fscache*.h 8226 + F: include/linux/netfs.h 8227 + F: include/trace/events/fscache.h 8228 + F: include/trace/events/netfs.h 8229 + 8217 8230 FILESYSTEMS [STACKABLE] 8218 8231 M: Miklos Szeredi <miklos@szeredi.hu> 8219 8232 M: Amir Goldstein <amir73il@gmail.com> ··· 8671 8658 F: Documentation/power/freezing-of-tasks.rst 8672 8659 F: include/linux/freezer.h 8673 8660 F: kernel/freezer.c 8674 - 8675 - FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS 8676 - M: David Howells <dhowells@redhat.com> 8677 - L: linux-cachefs@redhat.com (moderated for non-subscribers) 8678 - S: Supported 8679 - F: Documentation/filesystems/caching/ 8680 - F: fs/fscache/ 8681 - F: include/linux/fscache*.h 8682 8661 8683 8662 FSCRYPT: FILE SYSTEM LEVEL ENCRYPTION SUPPORT 8684 8663 M: Eric Biggers <ebiggers@kernel.org>

+2 -1

arch/arm/configs/mxs_defconfig

··· 138 138 CONFIG_NVMEM_MXS_OCOTP=y 139 139 CONFIG_EXT4_FS=y 140 140 # CONFIG_DNOTIFY is not set 141 - CONFIG_FSCACHE=m 141 + CONFIG_NETFS_SUPPORT=m 142 + CONFIG_FSCACHE=y 142 143 CONFIG_FSCACHE_STATS=y 143 144 CONFIG_CACHEFILES=m 144 145 CONFIG_VFAT_FS=y

+2 -1

arch/csky/configs/defconfig

··· 34 34 CONFIG_EXT4_FS=y 35 35 CONFIG_FANOTIFY=y 36 36 CONFIG_QUOTA=y 37 - CONFIG_FSCACHE=m 37 + CONFIG_NETFS_SUPPORT=m 38 + CONFIG_FSCACHE=y 38 39 CONFIG_FSCACHE_STATS=y 39 40 CONFIG_CACHEFILES=m 40 41 CONFIG_MSDOS_FS=y

+2 -1

arch/mips/configs/ip27_defconfig

··· 287 287 CONFIG_QUOTA_NETLINK_INTERFACE=y 288 288 CONFIG_FUSE_FS=m 289 289 CONFIG_CUSE=m 290 - CONFIG_FSCACHE=m 290 + CONFIG_NETFS_SUPPORT=m 291 + CONFIG_FSCACHE=y 291 292 CONFIG_FSCACHE_STATS=y 292 293 CONFIG_CACHEFILES=m 293 294 CONFIG_PROC_KCORE=y

+2 -1

arch/mips/configs/lemote2f_defconfig

··· 238 238 CONFIG_QUOTA=y 239 239 CONFIG_QFMT_V2=m 240 240 CONFIG_AUTOFS_FS=m 241 - CONFIG_FSCACHE=m 241 + CONFIG_NETFS_SUPPORT=m 242 + CONFIG_FSCACHE=y 242 243 CONFIG_CACHEFILES=m 243 244 CONFIG_ISO9660_FS=m 244 245 CONFIG_JOLIET=y

+2 -1

arch/mips/configs/loongson3_defconfig

··· 356 356 CONFIG_AUTOFS_FS=y 357 357 CONFIG_FUSE_FS=m 358 358 CONFIG_VIRTIO_FS=m 359 - CONFIG_FSCACHE=m 359 + CONFIG_NETFS_SUPPORT=m 360 + CONFIG_FSCACHE=y 360 361 CONFIG_ISO9660_FS=m 361 362 CONFIG_JOLIET=y 362 363 CONFIG_MSDOS_FS=m

+2 -1

arch/mips/configs/pic32mzda_defconfig

··· 68 68 CONFIG_EXT4_FS_SECURITY=y 69 69 CONFIG_AUTOFS_FS=m 70 70 CONFIG_FUSE_FS=m 71 - CONFIG_FSCACHE=m 71 + CONFIG_NETFS_SUPPORT=m 72 + CONFIG_FSCACHE=y 72 73 CONFIG_ISO9660_FS=m 73 74 CONFIG_JOLIET=y 74 75 CONFIG_ZISOFS=y

+2 -1

arch/s390/configs/debug_defconfig

··· 637 637 CONFIG_CUSE=m 638 638 CONFIG_VIRTIO_FS=m 639 639 CONFIG_OVERLAY_FS=m 640 + CONFIG_NETFS_SUPPORT=m 640 641 CONFIG_NETFS_STATS=y 641 - CONFIG_FSCACHE=m 642 + CONFIG_FSCACHE=y 642 643 CONFIG_CACHEFILES=m 643 644 CONFIG_ISO9660_FS=y 644 645 CONFIG_JOLIET=y

+2 -1

arch/s390/configs/defconfig

··· 622 622 CONFIG_CUSE=m 623 623 CONFIG_VIRTIO_FS=m 624 624 CONFIG_OVERLAY_FS=m 625 + CONFIG_NETFS_SUPPORT=m 625 626 CONFIG_NETFS_STATS=y 626 - CONFIG_FSCACHE=m 627 + CONFIG_FSCACHE=y 627 628 CONFIG_CACHEFILES=m 628 629 CONFIG_ISO9660_FS=y 629 630 CONFIG_JOLIET=y

+2 -1

arch/sh/configs/sdk7786_defconfig

··· 171 171 CONFIG_AUTOFS_FS=m 172 172 CONFIG_FUSE_FS=y 173 173 CONFIG_CUSE=m 174 - CONFIG_FSCACHE=m 174 + CONFIG_NETFS_SUPPORT=m 175 + CONFIG_FSCACHE=y 175 176 CONFIG_CACHEFILES=m 176 177 CONFIG_ISO9660_FS=m 177 178 CONFIG_JOLIET=y

+1

fs/9p/v9fs_vfs.h

··· 42 42 void v9fs_free_inode(struct inode *inode); 43 43 struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, 44 44 dev_t rdev); 45 + void v9fs_set_netfs_context(struct inode *inode); 45 46 int v9fs_init_inode(struct v9fs_session_info *v9ses, 46 47 struct inode *inode, umode_t mode, dev_t rdev); 47 48 void v9fs_evict_inode(struct inode *inode);

+68 -283

fs/9p/vfs_addr.c

··· 19 19 #include <linux/netfs.h> 20 20 #include <net/9p/9p.h> 21 21 #include <net/9p/client.h> 22 + #include <trace/events/netfs.h> 22 23 23 24 #include "v9fs.h" 24 25 #include "v9fs_vfs.h" 25 26 #include "cache.h" 26 27 #include "fid.h" 28 + 29 + static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq) 30 + { 31 + struct p9_fid *fid = subreq->rreq->netfs_priv; 32 + int err, len; 33 + 34 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 35 + len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); 36 + netfs_write_subrequest_terminated(subreq, len ?: err, false); 37 + } 38 + 39 + static void v9fs_upload_to_server_worker(struct work_struct *work) 40 + { 41 + struct netfs_io_subrequest *subreq = 42 + container_of(work, struct netfs_io_subrequest, work); 43 + 44 + v9fs_upload_to_server(subreq); 45 + } 46 + 47 + /* 48 + * Set up write requests for a writeback slice. We need to add a write request 49 + * for each write we want to make. 50 + */ 51 + static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 52 + { 53 + struct netfs_io_subrequest *subreq; 54 + 55 + subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 56 + start, len, v9fs_upload_to_server_worker); 57 + if (subreq) 58 + netfs_queue_write_request(subreq); 59 + } 27 60 28 61 /** 29 62 * v9fs_issue_read - Issue a read from 9P ··· 66 33 { 67 34 struct netfs_io_request *rreq = subreq->rreq; 68 35 struct p9_fid *fid = rreq->netfs_priv; 69 - struct iov_iter to; 70 - loff_t pos = subreq->start + subreq->transferred; 71 - size_t len = subreq->len - subreq->transferred; 72 36 int total, err; 73 37 74 - iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len); 75 - 76 - total = p9_client_read(fid, pos, &to, &err); 38 + total = p9_client_read(fid, subreq->start + subreq->transferred, 39 + &subreq->io_iter, &err); 77 40 78 41 /* if we just extended the file size, any portion not in 79 42 * cache won't be on server and is zeroes */ ··· 79 50 } 80 51 81 52 /** 82 - * v9fs_init_request - Initialise a read request 53 + * v9fs_init_request - Initialise a request 83 54 * @rreq: The read request 84 55 * @file: The file being read from 85 56 */ 86 57 static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) 87 58 { 88 - struct p9_fid *fid = file->private_data; 59 + struct p9_fid *fid; 60 + bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || 61 + rreq->origin == NETFS_WRITEBACK || 62 + rreq->origin == NETFS_WRITETHROUGH || 63 + rreq->origin == NETFS_LAUNDER_WRITE || 64 + rreq->origin == NETFS_UNBUFFERED_WRITE || 65 + rreq->origin == NETFS_DIO_WRITE); 89 66 90 - BUG_ON(!fid); 67 + if (file) { 68 + fid = file->private_data; 69 + if (!fid) 70 + goto no_fid; 71 + p9_fid_get(fid); 72 + } else { 73 + fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true); 74 + if (!fid) 75 + goto no_fid; 76 + } 91 77 92 78 /* we might need to read from a fid that was opened write-only 93 79 * for read-modify-write of page cache, use the writeback fid 94 80 * for that */ 95 - WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && 96 - !(fid->mode & P9_ORDWR)); 97 - 98 - p9_fid_get(fid); 81 + WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && !(fid->mode & P9_ORDWR)); 99 82 rreq->netfs_priv = fid; 100 83 return 0; 84 + 85 + no_fid: 86 + WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", 87 + rreq->inode->i_ino); 88 + return -EINVAL; 101 89 } 102 90 103 91 /** ··· 128 82 p9_fid_put(fid); 129 83 } 130 84 131 - /** 132 - * v9fs_begin_cache_operation - Begin a cache operation for a read 133 - * @rreq: The read request 134 - */ 135 - static int v9fs_begin_cache_operation(struct netfs_io_request *rreq) 136 - { 137 - #ifdef CONFIG_9P_FSCACHE 138 - struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode)); 139 - 140 - return fscache_begin_read_operation(&rreq->cache_resources, cookie); 141 - #else 142 - return -ENOBUFS; 143 - #endif 144 - } 145 - 146 85 const struct netfs_request_ops v9fs_req_ops = { 147 86 .init_request = v9fs_init_request, 148 87 .free_request = v9fs_free_request, 149 - .begin_cache_operation = v9fs_begin_cache_operation, 150 88 .issue_read = v9fs_issue_read, 89 + .create_write_requests = v9fs_create_write_requests, 151 90 }; 152 91 153 - /** 154 - * v9fs_release_folio - release the private state associated with a folio 155 - * @folio: The folio to be released 156 - * @gfp: The caller's allocation restrictions 157 - * 158 - * Returns true if the page can be released, false otherwise. 159 - */ 160 - 161 - static bool v9fs_release_folio(struct folio *folio, gfp_t gfp) 162 - { 163 - if (folio_test_private(folio)) 164 - return false; 165 - #ifdef CONFIG_9P_FSCACHE 166 - if (folio_test_fscache(folio)) { 167 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 168 - return false; 169 - folio_wait_fscache(folio); 170 - } 171 - fscache_note_page_release(v9fs_inode_cookie(V9FS_I(folio_inode(folio)))); 172 - #endif 173 - return true; 174 - } 175 - 176 - static void v9fs_invalidate_folio(struct folio *folio, size_t offset, 177 - size_t length) 178 - { 179 - folio_wait_fscache(folio); 180 - } 181 - 182 - #ifdef CONFIG_9P_FSCACHE 183 - static void v9fs_write_to_cache_done(void *priv, ssize_t transferred_or_error, 184 - bool was_async) 185 - { 186 - struct v9fs_inode *v9inode = priv; 187 - __le32 version; 188 - 189 - if (IS_ERR_VALUE(transferred_or_error) && 190 - transferred_or_error != -ENOBUFS) { 191 - version = cpu_to_le32(v9inode->qid.version); 192 - fscache_invalidate(v9fs_inode_cookie(v9inode), &version, 193 - i_size_read(&v9inode->netfs.inode), 0); 194 - } 195 - } 196 - #endif 197 - 198 - static int v9fs_vfs_write_folio_locked(struct folio *folio) 199 - { 200 - struct inode *inode = folio_inode(folio); 201 - loff_t start = folio_pos(folio); 202 - loff_t i_size = i_size_read(inode); 203 - struct iov_iter from; 204 - size_t len = folio_size(folio); 205 - struct p9_fid *writeback_fid; 206 - int err; 207 - struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); 208 - struct fscache_cookie __maybe_unused *cookie = v9fs_inode_cookie(v9inode); 209 - 210 - if (start >= i_size) 211 - return 0; /* Simultaneous truncation occurred */ 212 - 213 - len = min_t(loff_t, i_size - start, len); 214 - 215 - iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len); 216 - 217 - writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true); 218 - if (!writeback_fid) { 219 - WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n", 220 - inode->i_private); 221 - return -EINVAL; 222 - } 223 - 224 - folio_wait_fscache(folio); 225 - folio_start_writeback(folio); 226 - 227 - p9_client_write(writeback_fid, start, &from, &err); 228 - 229 - #ifdef CONFIG_9P_FSCACHE 230 - if (err == 0 && 231 - fscache_cookie_enabled(cookie) && 232 - test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) { 233 - folio_start_fscache(folio); 234 - fscache_write_to_cache(v9fs_inode_cookie(v9inode), 235 - folio_mapping(folio), start, len, i_size, 236 - v9fs_write_to_cache_done, v9inode, 237 - true); 238 - } 239 - #endif 240 - 241 - folio_end_writeback(folio); 242 - p9_fid_put(writeback_fid); 243 - 244 - return err; 245 - } 246 - 247 - static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) 248 - { 249 - struct folio *folio = page_folio(page); 250 - int retval; 251 - 252 - p9_debug(P9_DEBUG_VFS, "folio %p\n", folio); 253 - 254 - retval = v9fs_vfs_write_folio_locked(folio); 255 - if (retval < 0) { 256 - if (retval == -EAGAIN) { 257 - folio_redirty_for_writepage(wbc, folio); 258 - retval = 0; 259 - } else { 260 - mapping_set_error(folio_mapping(folio), retval); 261 - } 262 - } else 263 - retval = 0; 264 - 265 - folio_unlock(folio); 266 - return retval; 267 - } 268 - 269 - static int v9fs_launder_folio(struct folio *folio) 270 - { 271 - int retval; 272 - 273 - if (folio_clear_dirty_for_io(folio)) { 274 - retval = v9fs_vfs_write_folio_locked(folio); 275 - if (retval) 276 - return retval; 277 - } 278 - folio_wait_fscache(folio); 279 - return 0; 280 - } 281 - 282 - /** 283 - * v9fs_direct_IO - 9P address space operation for direct I/O 284 - * @iocb: target I/O control block 285 - * @iter: The data/buffer to use 286 - * 287 - * The presence of v9fs_direct_IO() in the address space ops vector 288 - * allowes open() O_DIRECT flags which would have failed otherwise. 289 - * 290 - * In the non-cached mode, we shunt off direct read and write requests before 291 - * the VFS gets them, so this method should never be called. 292 - * 293 - * Direct IO is not 'yet' supported in the cached mode. Hence when 294 - * this routine is called through generic_file_aio_read(), the read/write fails 295 - * with an error. 296 - * 297 - */ 298 - static ssize_t 299 - v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 300 - { 301 - struct file *file = iocb->ki_filp; 302 - loff_t pos = iocb->ki_pos; 303 - ssize_t n; 304 - int err = 0; 305 - 306 - if (iov_iter_rw(iter) == WRITE) { 307 - n = p9_client_write(file->private_data, pos, iter, &err); 308 - if (n) { 309 - struct inode *inode = file_inode(file); 310 - loff_t i_size = i_size_read(inode); 311 - 312 - if (pos + n > i_size) 313 - inode_add_bytes(inode, pos + n - i_size); 314 - } 315 - } else { 316 - n = p9_client_read(file->private_data, pos, iter, &err); 317 - } 318 - return n ? n : err; 319 - } 320 - 321 - static int v9fs_write_begin(struct file *filp, struct address_space *mapping, 322 - loff_t pos, unsigned int len, 323 - struct page **subpagep, void **fsdata) 324 - { 325 - int retval; 326 - struct folio *folio; 327 - struct v9fs_inode *v9inode = V9FS_I(mapping->host); 328 - 329 - p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); 330 - 331 - /* Prefetch area to be written into the cache if we're caching this 332 - * file. We need to do this before we get a lock on the page in case 333 - * there's more than one writer competing for the same cache block. 334 - */ 335 - retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata); 336 - if (retval < 0) 337 - return retval; 338 - 339 - *subpagep = &folio->page; 340 - return retval; 341 - } 342 - 343 - static int v9fs_write_end(struct file *filp, struct address_space *mapping, 344 - loff_t pos, unsigned int len, unsigned int copied, 345 - struct page *subpage, void *fsdata) 346 - { 347 - loff_t last_pos = pos + copied; 348 - struct folio *folio = page_folio(subpage); 349 - struct inode *inode = mapping->host; 350 - 351 - p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); 352 - 353 - if (!folio_test_uptodate(folio)) { 354 - if (unlikely(copied < len)) { 355 - copied = 0; 356 - goto out; 357 - } 358 - 359 - folio_mark_uptodate(folio); 360 - } 361 - 362 - /* 363 - * No need to use i_size_read() here, the i_size 364 - * cannot change under us because we hold the i_mutex. 365 - */ 366 - if (last_pos > inode->i_size) { 367 - inode_add_bytes(inode, last_pos - inode->i_size); 368 - i_size_write(inode, last_pos); 369 - #ifdef CONFIG_9P_FSCACHE 370 - fscache_update_cookie(v9fs_inode_cookie(V9FS_I(inode)), NULL, 371 - &last_pos); 372 - #endif 373 - } 374 - folio_mark_dirty(folio); 375 - out: 376 - folio_unlock(folio); 377 - folio_put(folio); 378 - 379 - return copied; 380 - } 381 - 382 - #ifdef CONFIG_9P_FSCACHE 383 - /* 384 - * Mark a page as having been made dirty and thus needing writeback. We also 385 - * need to pin the cache object to write back to. 386 - */ 387 - static bool v9fs_dirty_folio(struct address_space *mapping, struct folio *folio) 388 - { 389 - struct v9fs_inode *v9inode = V9FS_I(mapping->host); 390 - 391 - return fscache_dirty_folio(mapping, folio, v9fs_inode_cookie(v9inode)); 392 - } 393 - #else 394 - #define v9fs_dirty_folio filemap_dirty_folio 395 - #endif 396 - 397 92 const struct address_space_operations v9fs_addr_operations = { 398 - .read_folio = netfs_read_folio, 399 - .readahead = netfs_readahead, 400 - .dirty_folio = v9fs_dirty_folio, 401 - .writepage = v9fs_vfs_writepage, 402 - .write_begin = v9fs_write_begin, 403 - .write_end = v9fs_write_end, 404 - .release_folio = v9fs_release_folio, 405 - .invalidate_folio = v9fs_invalidate_folio, 406 - .launder_folio = v9fs_launder_folio, 407 - .direct_IO = v9fs_direct_IO, 93 + .read_folio = netfs_read_folio, 94 + .readahead = netfs_readahead, 95 + .dirty_folio = netfs_dirty_folio, 96 + .release_folio = netfs_release_folio, 97 + .invalidate_folio = netfs_invalidate_folio, 98 + .launder_folio = netfs_launder_folio, 99 + .direct_IO = noop_direct_IO, 100 + .writepages = netfs_writepages, 408 101 };

+9 -80

fs/9p/vfs_file.c

··· 353 353 v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 354 354 { 355 355 struct p9_fid *fid = iocb->ki_filp->private_data; 356 - int ret, err = 0; 357 356 358 357 p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n", 359 358 fid->fid, iov_iter_count(to), iocb->ki_pos); 360 359 361 - if (!(fid->mode & P9L_DIRECT)) { 362 - p9_debug(P9_DEBUG_VFS, "(cached)\n"); 363 - return generic_file_read_iter(iocb, to); 364 - } 360 + if (fid->mode & P9L_DIRECT) 361 + return netfs_unbuffered_read_iter(iocb, to); 365 362 366 - if (iocb->ki_filp->f_flags & O_NONBLOCK) 367 - ret = p9_client_read_once(fid, iocb->ki_pos, to, &err); 368 - else 369 - ret = p9_client_read(fid, iocb->ki_pos, to, &err); 370 - if (!ret) 371 - return err; 372 - 373 - iocb->ki_pos += ret; 374 - return ret; 363 + p9_debug(P9_DEBUG_VFS, "(cached)\n"); 364 + return netfs_file_read_iter(iocb, to); 375 365 } 376 366 377 367 /* ··· 397 407 { 398 408 struct file *file = iocb->ki_filp; 399 409 struct p9_fid *fid = file->private_data; 400 - ssize_t retval; 401 - loff_t origin; 402 - int err = 0; 403 410 404 411 p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid); 405 412 406 - if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) { 407 - p9_debug(P9_DEBUG_CACHE, "(cached)\n"); 408 - return generic_file_write_iter(iocb, from); 409 - } 413 + if (fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE)) 414 + return netfs_unbuffered_write_iter(iocb, from); 410 415 411 - retval = generic_write_checks(iocb, from); 412 - if (retval <= 0) 413 - return retval; 414 - 415 - origin = iocb->ki_pos; 416 - retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err); 417 - if (retval > 0) { 418 - struct inode *inode = file_inode(file); 419 - loff_t i_size; 420 - unsigned long pg_start, pg_end; 421 - 422 - pg_start = origin >> PAGE_SHIFT; 423 - pg_end = (origin + retval - 1) >> PAGE_SHIFT; 424 - if (inode->i_mapping && inode->i_mapping->nrpages) 425 - invalidate_inode_pages2_range(inode->i_mapping, 426 - pg_start, pg_end); 427 - iocb->ki_pos += retval; 428 - i_size = i_size_read(inode); 429 - if (iocb->ki_pos > i_size) { 430 - inode_add_bytes(inode, iocb->ki_pos - i_size); 431 - /* 432 - * Need to serialize against i_size_write() in 433 - * v9fs_stat2inode() 434 - */ 435 - v9fs_i_size_write(inode, iocb->ki_pos); 436 - } 437 - return retval; 438 - } 439 - return err; 416 + p9_debug(P9_DEBUG_CACHE, "(cached)\n"); 417 + return netfs_file_write_iter(iocb, from); 440 418 } 441 419 442 420 static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, ··· 477 519 static vm_fault_t 478 520 v9fs_vm_page_mkwrite(struct vm_fault *vmf) 479 521 { 480 - struct folio *folio = page_folio(vmf->page); 481 - struct file *filp = vmf->vma->vm_file; 482 - struct inode *inode = file_inode(filp); 483 - 484 - 485 - p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n", 486 - folio, (unsigned long)filp->private_data); 487 - 488 - /* Wait for the page to be written to the cache before we allow it to 489 - * be modified. We then assume the entire page will need writing back. 490 - */ 491 - #ifdef CONFIG_9P_FSCACHE 492 - if (folio_test_fscache(folio) && 493 - folio_wait_fscache_killable(folio) < 0) 494 - return VM_FAULT_NOPAGE; 495 - #endif 496 - 497 - /* Update file times before taking page lock */ 498 - file_update_time(filp); 499 - 500 - if (folio_lock_killable(folio) < 0) 501 - return VM_FAULT_RETRY; 502 - if (folio_mapping(folio) != inode->i_mapping) 503 - goto out_unlock; 504 - folio_wait_stable(folio); 505 - 506 - return VM_FAULT_LOCKED; 507 - out_unlock: 508 - folio_unlock(folio); 509 - return VM_FAULT_NOPAGE; 522 + return netfs_page_mkwrite(vmf, NULL); 510 523 } 511 524 512 525 static void v9fs_mmap_vm_close(struct vm_area_struct *vma)

+7 -9

fs/9p/vfs_inode.c

··· 246 246 /* 247 247 * Set parameters for the netfs library 248 248 */ 249 - static void v9fs_set_netfs_context(struct inode *inode) 249 + void v9fs_set_netfs_context(struct inode *inode) 250 250 { 251 251 struct v9fs_inode *v9inode = V9FS_I(inode); 252 - netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); 252 + netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true); 253 253 } 254 254 255 255 int v9fs_init_inode(struct v9fs_session_info *v9ses, ··· 326 326 err = -EINVAL; 327 327 goto error; 328 328 } 329 - 330 - v9fs_set_netfs_context(inode); 331 329 error: 332 330 return err; 333 331 ··· 357 359 iput(inode); 358 360 return ERR_PTR(err); 359 361 } 362 + v9fs_set_netfs_context(inode); 360 363 return inode; 361 364 } 362 365 ··· 373 374 374 375 truncate_inode_pages_final(&inode->i_data); 375 376 376 - #ifdef CONFIG_9P_FSCACHE 377 377 version = cpu_to_le32(v9inode->qid.version); 378 - fscache_clear_inode_writeback(v9fs_inode_cookie(v9inode), inode, 379 - &version); 380 - #endif 378 + netfs_clear_inode_writeback(inode, &version); 381 379 382 380 clear_inode(inode); 383 381 filemap_fdatawrite(&inode->i_data); ··· 460 464 goto error; 461 465 462 466 v9fs_stat2inode(st, inode, sb, 0); 467 + v9fs_set_netfs_context(inode); 463 468 v9fs_cache_inode_get_cookie(inode); 464 469 unlock_new_inode(inode); 465 470 return inode; ··· 1110 1113 if ((iattr->ia_valid & ATTR_SIZE) && 1111 1114 iattr->ia_size != i_size_read(inode)) { 1112 1115 truncate_setsize(inode, iattr->ia_size); 1113 - truncate_pagecache(inode, iattr->ia_size); 1116 + netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); 1114 1117 1115 1118 #ifdef CONFIG_9P_FSCACHE 1116 1119 if (v9ses->cache & CACHE_FSCACHE) { ··· 1178 1181 mode |= inode->i_mode & ~S_IALLUGO; 1179 1182 inode->i_mode = mode; 1180 1183 1184 + v9inode->netfs.remote_i_size = stat->length; 1181 1185 if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) 1182 1186 v9fs_i_size_write(inode, stat->length); 1183 1187 /* not real number of blocks, but 512 byte ones ... */

+6 -2

fs/9p/vfs_inode_dotl.c

··· 128 128 goto error; 129 129 130 130 v9fs_stat2inode_dotl(st, inode, 0); 131 + v9fs_set_netfs_context(inode); 131 132 v9fs_cache_inode_get_cookie(inode); 132 133 retval = v9fs_get_acl(inode, fid); 133 134 if (retval) ··· 599 598 if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != 600 599 i_size_read(inode)) { 601 600 truncate_setsize(inode, iattr->ia_size); 602 - truncate_pagecache(inode, iattr->ia_size); 601 + netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); 603 602 604 603 #ifdef CONFIG_9P_FSCACHE 605 604 if (v9ses->cache & CACHE_FSCACHE) ··· 656 655 mode |= inode->i_mode & ~S_IALLUGO; 657 656 inode->i_mode = mode; 658 657 658 + v9inode->netfs.remote_i_size = stat->st_size; 659 659 if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) 660 660 v9fs_i_size_write(inode, stat->st_size); 661 661 inode->i_blocks = stat->st_blocks; ··· 685 683 inode->i_mode = mode; 686 684 } 687 685 if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && 688 - stat->st_result_mask & P9_STATS_SIZE) 686 + stat->st_result_mask & P9_STATS_SIZE) { 687 + v9inode->netfs.remote_i_size = stat->st_size; 689 688 v9fs_i_size_write(inode, stat->st_size); 689 + } 690 690 if (stat->st_result_mask & P9_STATS_BLOCKS) 691 691 inode->i_blocks = stat->st_blocks; 692 692 }

+2 -12

fs/9p/vfs_super.c

··· 289 289 static int v9fs_write_inode(struct inode *inode, 290 290 struct writeback_control *wbc) 291 291 { 292 - struct v9fs_inode *v9inode; 293 - 294 292 /* 295 293 * send an fsync request to server irrespective of 296 294 * wbc->sync_mode. 297 295 */ 298 296 p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); 299 - 300 - v9inode = V9FS_I(inode); 301 - fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode)); 302 - 303 - return 0; 297 + return netfs_unpin_writeback(inode, wbc); 304 298 } 305 299 306 300 static int v9fs_write_inode_dotl(struct inode *inode, 307 301 struct writeback_control *wbc) 308 302 { 309 - struct v9fs_inode *v9inode; 310 303 311 - v9inode = V9FS_I(inode); 312 304 p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); 313 305 314 - fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode)); 315 - 316 - return 0; 306 + return netfs_unpin_writeback(inode, wbc); 317 307 } 318 308 319 309 static const struct super_operations v9fs_super_ops = {

-1

fs/Kconfig

··· 144 144 menu "Caches" 145 145 146 146 source "fs/netfs/Kconfig" 147 - source "fs/fscache/Kconfig" 148 147 source "fs/cachefiles/Kconfig" 149 148 150 149 endmenu

-1

fs/Makefile

··· 61 61 62 62 # Do not add any filesystems before this line 63 63 obj-$(CONFIG_NETFS_SUPPORT) += netfs/ 64 - obj-$(CONFIG_FSCACHE) += fscache/ 65 64 obj-$(CONFIG_REISERFS_FS) += reiserfs/ 66 65 obj-$(CONFIG_EXT4_FS) += ext4/ 67 66 # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the

+1 -1

fs/afs/dynroot.c

··· 76 76 /* there shouldn't be an existing inode */ 77 77 BUG_ON(!(inode->i_state & I_NEW)); 78 78 79 - netfs_inode_init(&vnode->netfs, NULL); 79 + netfs_inode_init(&vnode->netfs, NULL, false); 80 80 inode->i_size = 0; 81 81 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 82 82 if (root) {

+60 -157

fs/afs/file.c

··· 20 20 21 21 static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); 22 22 static int afs_symlink_read_folio(struct file *file, struct folio *folio); 23 - static void afs_invalidate_folio(struct folio *folio, size_t offset, 24 - size_t length); 25 - static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags); 26 23 27 24 static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); 28 25 static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, ··· 34 37 .release = afs_release, 35 38 .llseek = generic_file_llseek, 36 39 .read_iter = afs_file_read_iter, 37 - .write_iter = afs_file_write, 40 + .write_iter = netfs_file_write_iter, 38 41 .mmap = afs_file_mmap, 39 42 .splice_read = afs_file_splice_read, 40 43 .splice_write = iter_file_splice_write, ··· 50 53 }; 51 54 52 55 const struct address_space_operations afs_file_aops = { 56 + .direct_IO = noop_direct_IO, 53 57 .read_folio = netfs_read_folio, 54 58 .readahead = netfs_readahead, 55 - .dirty_folio = afs_dirty_folio, 56 - .launder_folio = afs_launder_folio, 57 - .release_folio = afs_release_folio, 58 - .invalidate_folio = afs_invalidate_folio, 59 - .write_begin = afs_write_begin, 60 - .write_end = afs_write_end, 61 - .writepages = afs_writepages, 59 + .dirty_folio = netfs_dirty_folio, 60 + .launder_folio = netfs_launder_folio, 61 + .release_folio = netfs_release_folio, 62 + .invalidate_folio = netfs_invalidate_folio, 62 63 .migrate_folio = filemap_migrate_folio, 64 + .writepages = afs_writepages, 63 65 }; 64 66 65 67 const struct address_space_operations afs_symlink_aops = { 66 68 .read_folio = afs_symlink_read_folio, 67 - .release_folio = afs_release_folio, 68 - .invalidate_folio = afs_invalidate_folio, 69 + .release_folio = netfs_release_folio, 70 + .invalidate_folio = netfs_invalidate_folio, 69 71 .migrate_folio = filemap_migrate_folio, 70 72 }; 71 73 ··· 319 323 fsreq->len = subreq->len - subreq->transferred; 320 324 fsreq->key = key_get(subreq->rreq->netfs_priv); 321 325 fsreq->vnode = vnode; 322 - fsreq->iter = &fsreq->def_iter; 323 - 324 - iov_iter_xarray(&fsreq->def_iter, ITER_DEST, 325 - &fsreq->vnode->netfs.inode.i_mapping->i_pages, 326 - fsreq->pos, fsreq->len); 326 + fsreq->iter = &subreq->io_iter; 327 327 328 328 afs_fetch_data(fsreq->vnode, fsreq); 329 329 afs_put_read(fsreq); ··· 351 359 352 360 static int afs_init_request(struct netfs_io_request *rreq, struct file *file) 353 361 { 354 - rreq->netfs_priv = key_get(afs_file_key(file)); 362 + if (file) 363 + rreq->netfs_priv = key_get(afs_file_key(file)); 364 + rreq->rsize = 256 * 1024; 365 + rreq->wsize = 256 * 1024; 355 366 return 0; 356 - } 357 - 358 - static int afs_begin_cache_operation(struct netfs_io_request *rreq) 359 - { 360 - #ifdef CONFIG_AFS_FSCACHE 361 - struct afs_vnode *vnode = AFS_FS_I(rreq->inode); 362 - 363 - return fscache_begin_read_operation(&rreq->cache_resources, 364 - afs_vnode_cache(vnode)); 365 - #else 366 - return -ENOBUFS; 367 - #endif 368 367 } 369 368 370 369 static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, ··· 371 388 key_put(rreq->netfs_priv); 372 389 } 373 390 391 + static void afs_update_i_size(struct inode *inode, loff_t new_i_size) 392 + { 393 + struct afs_vnode *vnode = AFS_FS_I(inode); 394 + loff_t i_size; 395 + 396 + write_seqlock(&vnode->cb_lock); 397 + i_size = i_size_read(&vnode->netfs.inode); 398 + if (new_i_size > i_size) { 399 + i_size_write(&vnode->netfs.inode, new_i_size); 400 + inode_set_bytes(&vnode->netfs.inode, new_i_size); 401 + } 402 + write_sequnlock(&vnode->cb_lock); 403 + fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size); 404 + } 405 + 406 + static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq) 407 + { 408 + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); 409 + 410 + afs_invalidate_cache(vnode, 0); 411 + } 412 + 374 413 const struct netfs_request_ops afs_req_ops = { 375 414 .init_request = afs_init_request, 376 415 .free_request = afs_free_request, 377 - .begin_cache_operation = afs_begin_cache_operation, 378 416 .check_write_begin = afs_check_write_begin, 379 417 .issue_read = afs_issue_read, 418 + .update_i_size = afs_update_i_size, 419 + .invalidate_cache = afs_netfs_invalidate_cache, 420 + .create_write_requests = afs_create_write_requests, 380 421 }; 381 - 382 - int afs_write_inode(struct inode *inode, struct writeback_control *wbc) 383 - { 384 - fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode))); 385 - return 0; 386 - } 387 - 388 - /* 389 - * Adjust the dirty region of the page on truncation or full invalidation, 390 - * getting rid of the markers altogether if the region is entirely invalidated. 391 - */ 392 - static void afs_invalidate_dirty(struct folio *folio, size_t offset, 393 - size_t length) 394 - { 395 - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); 396 - unsigned long priv; 397 - unsigned int f, t, end = offset + length; 398 - 399 - priv = (unsigned long)folio_get_private(folio); 400 - 401 - /* we clean up only if the entire page is being invalidated */ 402 - if (offset == 0 && length == folio_size(folio)) 403 - goto full_invalidate; 404 - 405 - /* If the page was dirtied by page_mkwrite(), the PTE stays writable 406 - * and we don't get another notification to tell us to expand it 407 - * again. 408 - */ 409 - if (afs_is_folio_dirty_mmapped(priv)) 410 - return; 411 - 412 - /* We may need to shorten the dirty region */ 413 - f = afs_folio_dirty_from(folio, priv); 414 - t = afs_folio_dirty_to(folio, priv); 415 - 416 - if (t <= offset || f >= end) 417 - return; /* Doesn't overlap */ 418 - 419 - if (f < offset && t > end) 420 - return; /* Splits the dirty region - just absorb it */ 421 - 422 - if (f >= offset && t <= end) 423 - goto undirty; 424 - 425 - if (f < offset) 426 - t = offset; 427 - else 428 - f = end; 429 - if (f == t) 430 - goto undirty; 431 - 432 - priv = afs_folio_dirty(folio, f, t); 433 - folio_change_private(folio, (void *)priv); 434 - trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio); 435 - return; 436 - 437 - undirty: 438 - trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio); 439 - folio_clear_dirty_for_io(folio); 440 - full_invalidate: 441 - trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio); 442 - folio_detach_private(folio); 443 - } 444 - 445 - /* 446 - * invalidate part or all of a page 447 - * - release a page and clean up its private data if offset is 0 (indicating 448 - * the entire page) 449 - */ 450 - static void afs_invalidate_folio(struct folio *folio, size_t offset, 451 - size_t length) 452 - { 453 - _enter("{%lu},%zu,%zu", folio->index, offset, length); 454 - 455 - BUG_ON(!folio_test_locked(folio)); 456 - 457 - if (folio_get_private(folio)) 458 - afs_invalidate_dirty(folio, offset, length); 459 - 460 - folio_wait_fscache(folio); 461 - _leave(""); 462 - } 463 - 464 - /* 465 - * release a page and clean up its private state if it's not busy 466 - * - return true if the page can now be released, false if not 467 - */ 468 - static bool afs_release_folio(struct folio *folio, gfp_t gfp) 469 - { 470 - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); 471 - 472 - _enter("{{%llx:%llu}[%lu],%lx},%x", 473 - vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags, 474 - gfp); 475 - 476 - /* deny if folio is being written to the cache and the caller hasn't 477 - * elected to wait */ 478 - #ifdef CONFIG_AFS_FSCACHE 479 - if (folio_test_fscache(folio)) { 480 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 481 - return false; 482 - folio_wait_fscache(folio); 483 - } 484 - fscache_note_page_release(afs_vnode_cache(vnode)); 485 - #endif 486 - 487 - if (folio_test_private(folio)) { 488 - trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio); 489 - folio_detach_private(folio); 490 - } 491 - 492 - /* Indicate that the folio can be released */ 493 - _leave(" = T"); 494 - return true; 495 - } 496 422 497 423 static void afs_add_open_mmap(struct afs_vnode *vnode) 498 424 { ··· 468 576 469 577 static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 470 578 { 471 - struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 579 + struct inode *inode = file_inode(iocb->ki_filp); 580 + struct afs_vnode *vnode = AFS_FS_I(inode); 472 581 struct afs_file *af = iocb->ki_filp->private_data; 473 - int ret; 582 + ssize_t ret; 474 583 475 - ret = afs_validate(vnode, af->key); 584 + if (iocb->ki_flags & IOCB_DIRECT) 585 + return netfs_unbuffered_read_iter(iocb, iter); 586 + 587 + ret = netfs_start_io_read(inode); 476 588 if (ret < 0) 477 589 return ret; 478 - 479 - return generic_file_read_iter(iocb, iter); 590 + ret = afs_validate(vnode, af->key); 591 + if (ret == 0) 592 + ret = filemap_read(iocb, iter, 0); 593 + netfs_end_io_read(inode); 594 + return ret; 480 595 } 481 596 482 597 static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, 483 598 struct pipe_inode_info *pipe, 484 599 size_t len, unsigned int flags) 485 600 { 486 - struct afs_vnode *vnode = AFS_FS_I(file_inode(in)); 601 + struct inode *inode = file_inode(in); 602 + struct afs_vnode *vnode = AFS_FS_I(inode); 487 603 struct afs_file *af = in->private_data; 488 - int ret; 604 + ssize_t ret; 489 605 490 - ret = afs_validate(vnode, af->key); 606 + ret = netfs_start_io_read(inode); 491 607 if (ret < 0) 492 608 return ret; 493 - 494 - return filemap_splice_read(in, ppos, pipe, len, flags); 609 + ret = afs_validate(vnode, af->key); 610 + if (ret == 0) 611 + ret = filemap_splice_read(in, ppos, pipe, len, flags); 612 + netfs_end_io_read(inode); 613 + return ret; 495 614 }

+16 -12

fs/afs/inode.c

··· 58 58 */ 59 59 static void afs_set_netfs_context(struct afs_vnode *vnode) 60 60 { 61 - netfs_inode_init(&vnode->netfs, &afs_req_ops); 61 + netfs_inode_init(&vnode->netfs, &afs_req_ops, true); 62 62 } 63 63 64 64 /* ··· 166 166 struct inode *inode = &vnode->netfs.inode; 167 167 struct timespec64 t; 168 168 umode_t mode; 169 + bool unexpected_jump = false; 169 170 bool data_changed = false; 170 171 bool change_size = vp->set_size; 171 172 ··· 231 230 } 232 231 change_size = true; 233 232 data_changed = true; 233 + unexpected_jump = true; 234 234 } else if (vnode->status.type == AFS_FTYPE_DIR) { 235 235 /* Expected directory change is handled elsewhere so 236 236 * that we can locally edit the directory and save on a ··· 251 249 * what's on the server. 252 250 */ 253 251 vnode->netfs.remote_i_size = status->size; 254 - if (change_size) { 252 + if (change_size || status->size > i_size_read(inode)) { 255 253 afs_set_i_size(vnode, status->size); 254 + if (unexpected_jump) 255 + vnode->netfs.zero_point = status->size; 256 256 inode_set_ctime_to_ts(inode, t); 257 257 inode_set_atime_to_ts(inode, t); 258 258 } ··· 651 647 truncate_inode_pages_final(&inode->i_data); 652 648 653 649 afs_set_cache_aux(vnode, &aux); 654 - fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux); 650 + netfs_clear_inode_writeback(inode, &aux); 655 651 clear_inode(inode); 656 652 657 653 while (!list_empty(&vnode->wb_keys)) { ··· 693 689 static void afs_setattr_edit_file(struct afs_operation *op) 694 690 { 695 691 struct afs_vnode_param *vp = &op->file[0]; 696 - struct inode *inode = &vp->vnode->netfs.inode; 692 + struct afs_vnode *vnode = vp->vnode; 697 693 698 694 if (op->setattr.attr->ia_valid & ATTR_SIZE) { 699 695 loff_t size = op->setattr.attr->ia_size; 700 696 loff_t i_size = op->setattr.old_i_size; 701 697 702 - if (size < i_size) 703 - truncate_pagecache(inode, size); 704 - if (size != i_size) 705 - fscache_resize_cookie(afs_vnode_cache(vp->vnode), 706 - vp->scb.status.size); 698 + if (size != i_size) { 699 + truncate_setsize(&vnode->netfs.inode, size); 700 + netfs_resize_file(&vnode->netfs, size, true); 701 + fscache_resize_cookie(afs_vnode_cache(vnode), size); 702 + } 707 703 } 708 704 } 709 705 ··· 771 767 */ 772 768 if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && 773 769 attr->ia_size < i_size && 774 - attr->ia_size > vnode->status.size) { 775 - truncate_pagecache(inode, attr->ia_size); 770 + attr->ia_size > vnode->netfs.remote_i_size) { 771 + truncate_setsize(inode, attr->ia_size); 772 + netfs_resize_file(&vnode->netfs, size, false); 776 773 fscache_resize_cookie(afs_vnode_cache(vnode), 777 774 attr->ia_size); 778 - i_size_write(inode, attr->ia_size); 779 775 ret = 0; 780 776 goto out_unlock; 781 777 }

+1 -71

fs/afs/internal.h

··· 985 985 i_size_read(&vnode->netfs.inode), flags); 986 986 } 987 987 988 - /* 989 - * We use folio->private to hold the amount of the folio that we've written to, 990 - * splitting the field into two parts. However, we need to represent a range 991 - * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio 992 - * exceeds what we can encode. 993 - */ 994 - #ifdef CONFIG_64BIT 995 - #define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL 996 - #define __AFS_FOLIO_PRIV_SHIFT 32 997 - #define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL 998 - #else 999 - #define __AFS_FOLIO_PRIV_MASK 0x7fffUL 1000 - #define __AFS_FOLIO_PRIV_SHIFT 16 1001 - #define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL 1002 - #endif 1003 - 1004 - static inline unsigned int afs_folio_dirty_resolution(struct folio *folio) 1005 - { 1006 - int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1); 1007 - return (shift > 0) ? shift : 0; 1008 - } 1009 - 1010 - static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv) 1011 - { 1012 - unsigned long x = priv & __AFS_FOLIO_PRIV_MASK; 1013 - 1014 - /* The lower bound is inclusive */ 1015 - return x << afs_folio_dirty_resolution(folio); 1016 - } 1017 - 1018 - static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv) 1019 - { 1020 - unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK; 1021 - 1022 - /* The upper bound is immediately beyond the region */ 1023 - return (x + 1) << afs_folio_dirty_resolution(folio); 1024 - } 1025 - 1026 - static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to) 1027 - { 1028 - unsigned int res = afs_folio_dirty_resolution(folio); 1029 - from >>= res; 1030 - to = (to - 1) >> res; 1031 - return (to << __AFS_FOLIO_PRIV_SHIFT) | from; 1032 - } 1033 - 1034 - static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv) 1035 - { 1036 - return priv | __AFS_FOLIO_PRIV_MMAPPED; 1037 - } 1038 - 1039 - static inline bool afs_is_folio_dirty_mmapped(unsigned long priv) 1040 - { 1041 - return priv & __AFS_FOLIO_PRIV_MMAPPED; 1042 - } 1043 - 1044 988 #include <trace/events/afs.h> 1045 989 1046 990 /*****************************************************************************/ ··· 1111 1167 extern int afs_fetch_data(struct afs_vnode *, struct afs_read *); 1112 1168 extern struct afs_read *afs_alloc_read(gfp_t); 1113 1169 extern void afs_put_read(struct afs_read *); 1114 - extern int afs_write_inode(struct inode *, struct writeback_control *); 1115 1170 1116 1171 static inline struct afs_read *afs_get_read(struct afs_read *req) 1117 1172 { ··· 1601 1658 /* 1602 1659 * write.c 1603 1660 */ 1604 - #ifdef CONFIG_AFS_FSCACHE 1605 - bool afs_dirty_folio(struct address_space *, struct folio *); 1606 - #else 1607 - #define afs_dirty_folio filemap_dirty_folio 1608 - #endif 1609 - extern int afs_write_begin(struct file *file, struct address_space *mapping, 1610 - loff_t pos, unsigned len, 1611 - struct page **pagep, void **fsdata); 1612 - extern int afs_write_end(struct file *file, struct address_space *mapping, 1613 - loff_t pos, unsigned len, unsigned copied, 1614 - struct page *page, void *fsdata); 1615 - extern int afs_writepage(struct page *, struct writeback_control *); 1616 1661 extern int afs_writepages(struct address_space *, struct writeback_control *); 1617 - extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); 1618 1662 extern int afs_fsync(struct file *, loff_t, loff_t, int); 1619 1663 extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); 1620 1664 extern void afs_prune_wb_keys(struct afs_vnode *); 1621 - int afs_launder_folio(struct folio *); 1665 + void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len); 1622 1666 1623 1667 /* 1624 1668 * xattr.c

+1 -1

fs/afs/super.c

··· 55 55 static const struct super_operations afs_super_ops = { 56 56 .statfs = afs_statfs, 57 57 .alloc_inode = afs_alloc_inode, 58 - .write_inode = afs_write_inode, 58 + .write_inode = netfs_unpin_writeback, 59 59 .drop_inode = afs_drop_inode, 60 60 .destroy_inode = afs_destroy_inode, 61 61 .free_inode = afs_free_inode,

+34 -802

fs/afs/write.c

··· 12 12 #include <linux/writeback.h> 13 13 #include <linux/pagevec.h> 14 14 #include <linux/netfs.h> 15 + #include <trace/events/netfs.h> 15 16 #include "internal.h" 16 - 17 - static int afs_writepages_region(struct address_space *mapping, 18 - struct writeback_control *wbc, 19 - loff_t start, loff_t end, loff_t *_next, 20 - bool max_one_loop); 21 - 22 - static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len, 23 - loff_t i_size, bool caching); 24 - 25 - #ifdef CONFIG_AFS_FSCACHE 26 - /* 27 - * Mark a page as having been made dirty and thus needing writeback. We also 28 - * need to pin the cache object to write back to. 29 - */ 30 - bool afs_dirty_folio(struct address_space *mapping, struct folio *folio) 31 - { 32 - return fscache_dirty_folio(mapping, folio, 33 - afs_vnode_cache(AFS_FS_I(mapping->host))); 34 - } 35 - static void afs_folio_start_fscache(bool caching, struct folio *folio) 36 - { 37 - if (caching) 38 - folio_start_fscache(folio); 39 - } 40 - #else 41 - static void afs_folio_start_fscache(bool caching, struct folio *folio) 42 - { 43 - } 44 - #endif 45 - 46 - /* 47 - * Flush out a conflicting write. This may extend the write to the surrounding 48 - * pages if also dirty and contiguous to the conflicting region.. 49 - */ 50 - static int afs_flush_conflicting_write(struct address_space *mapping, 51 - struct folio *folio) 52 - { 53 - struct writeback_control wbc = { 54 - .sync_mode = WB_SYNC_ALL, 55 - .nr_to_write = LONG_MAX, 56 - .range_start = folio_pos(folio), 57 - .range_end = LLONG_MAX, 58 - }; 59 - loff_t next; 60 - 61 - return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX, 62 - &next, true); 63 - } 64 - 65 - /* 66 - * prepare to perform part of a write to a page 67 - */ 68 - int afs_write_begin(struct file *file, struct address_space *mapping, 69 - loff_t pos, unsigned len, 70 - struct page **_page, void **fsdata) 71 - { 72 - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 73 - struct folio *folio; 74 - unsigned long priv; 75 - unsigned f, from; 76 - unsigned t, to; 77 - pgoff_t index; 78 - int ret; 79 - 80 - _enter("{%llx:%llu},%llx,%x", 81 - vnode->fid.vid, vnode->fid.vnode, pos, len); 82 - 83 - /* Prefetch area to be written into the cache if we're caching this 84 - * file. We need to do this before we get a lock on the page in case 85 - * there's more than one writer competing for the same cache block. 86 - */ 87 - ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata); 88 - if (ret < 0) 89 - return ret; 90 - 91 - index = folio_index(folio); 92 - from = pos - index * PAGE_SIZE; 93 - to = from + len; 94 - 95 - try_again: 96 - /* See if this page is already partially written in a way that we can 97 - * merge the new write with. 98 - */ 99 - if (folio_test_private(folio)) { 100 - priv = (unsigned long)folio_get_private(folio); 101 - f = afs_folio_dirty_from(folio, priv); 102 - t = afs_folio_dirty_to(folio, priv); 103 - ASSERTCMP(f, <=, t); 104 - 105 - if (folio_test_writeback(folio)) { 106 - trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio); 107 - folio_unlock(folio); 108 - goto wait_for_writeback; 109 - } 110 - /* If the file is being filled locally, allow inter-write 111 - * spaces to be merged into writes. If it's not, only write 112 - * back what the user gives us. 113 - */ 114 - if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && 115 - (to < f || from > t)) 116 - goto flush_conflicting_write; 117 - } 118 - 119 - *_page = folio_file_page(folio, pos / PAGE_SIZE); 120 - _leave(" = 0"); 121 - return 0; 122 - 123 - /* The previous write and this write aren't adjacent or overlapping, so 124 - * flush the page out. 125 - */ 126 - flush_conflicting_write: 127 - trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio); 128 - folio_unlock(folio); 129 - 130 - ret = afs_flush_conflicting_write(mapping, folio); 131 - if (ret < 0) 132 - goto error; 133 - 134 - wait_for_writeback: 135 - ret = folio_wait_writeback_killable(folio); 136 - if (ret < 0) 137 - goto error; 138 - 139 - ret = folio_lock_killable(folio); 140 - if (ret < 0) 141 - goto error; 142 - goto try_again; 143 - 144 - error: 145 - folio_put(folio); 146 - _leave(" = %d", ret); 147 - return ret; 148 - } 149 - 150 - /* 151 - * finalise part of a write to a page 152 - */ 153 - int afs_write_end(struct file *file, struct address_space *mapping, 154 - loff_t pos, unsigned len, unsigned copied, 155 - struct page *subpage, void *fsdata) 156 - { 157 - struct folio *folio = page_folio(subpage); 158 - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 159 - unsigned long priv; 160 - unsigned int f, from = offset_in_folio(folio, pos); 161 - unsigned int t, to = from + copied; 162 - loff_t i_size, write_end_pos; 163 - 164 - _enter("{%llx:%llu},{%lx}", 165 - vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); 166 - 167 - if (!folio_test_uptodate(folio)) { 168 - if (copied < len) { 169 - copied = 0; 170 - goto out; 171 - } 172 - 173 - folio_mark_uptodate(folio); 174 - } 175 - 176 - if (copied == 0) 177 - goto out; 178 - 179 - write_end_pos = pos + copied; 180 - 181 - i_size = i_size_read(&vnode->netfs.inode); 182 - if (write_end_pos > i_size) { 183 - write_seqlock(&vnode->cb_lock); 184 - i_size = i_size_read(&vnode->netfs.inode); 185 - if (write_end_pos > i_size) 186 - afs_set_i_size(vnode, write_end_pos); 187 - write_sequnlock(&vnode->cb_lock); 188 - fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos); 189 - } 190 - 191 - if (folio_test_private(folio)) { 192 - priv = (unsigned long)folio_get_private(folio); 193 - f = afs_folio_dirty_from(folio, priv); 194 - t = afs_folio_dirty_to(folio, priv); 195 - if (from < f) 196 - f = from; 197 - if (to > t) 198 - t = to; 199 - priv = afs_folio_dirty(folio, f, t); 200 - folio_change_private(folio, (void *)priv); 201 - trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio); 202 - } else { 203 - priv = afs_folio_dirty(folio, from, to); 204 - folio_attach_private(folio, (void *)priv); 205 - trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio); 206 - } 207 - 208 - if (folio_mark_dirty(folio)) 209 - _debug("dirtied %lx", folio_index(folio)); 210 - 211 - out: 212 - folio_unlock(folio); 213 - folio_put(folio); 214 - return copied; 215 - } 216 - 217 - /* 218 - * kill all the pages in the given range 219 - */ 220 - static void afs_kill_pages(struct address_space *mapping, 221 - loff_t start, loff_t len) 222 - { 223 - struct afs_vnode *vnode = AFS_FS_I(mapping->host); 224 - struct folio *folio; 225 - pgoff_t index = start / PAGE_SIZE; 226 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 227 - 228 - _enter("{%llx:%llu},%llx @%llx", 229 - vnode->fid.vid, vnode->fid.vnode, len, start); 230 - 231 - do { 232 - _debug("kill %lx (to %lx)", index, last); 233 - 234 - folio = filemap_get_folio(mapping, index); 235 - if (IS_ERR(folio)) { 236 - next = index + 1; 237 - continue; 238 - } 239 - 240 - next = folio_next_index(folio); 241 - 242 - folio_clear_uptodate(folio); 243 - folio_end_writeback(folio); 244 - folio_lock(folio); 245 - generic_error_remove_folio(mapping, folio); 246 - folio_unlock(folio); 247 - folio_put(folio); 248 - 249 - } while (index = next, index <= last); 250 - 251 - _leave(""); 252 - } 253 - 254 - /* 255 - * Redirty all the pages in a given range. 256 - */ 257 - static void afs_redirty_pages(struct writeback_control *wbc, 258 - struct address_space *mapping, 259 - loff_t start, loff_t len) 260 - { 261 - struct afs_vnode *vnode = AFS_FS_I(mapping->host); 262 - struct folio *folio; 263 - pgoff_t index = start / PAGE_SIZE; 264 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 265 - 266 - _enter("{%llx:%llu},%llx @%llx", 267 - vnode->fid.vid, vnode->fid.vnode, len, start); 268 - 269 - do { 270 - _debug("redirty %llx @%llx", len, start); 271 - 272 - folio = filemap_get_folio(mapping, index); 273 - if (IS_ERR(folio)) { 274 - next = index + 1; 275 - continue; 276 - } 277 - 278 - next = index + folio_nr_pages(folio); 279 - folio_redirty_for_writepage(wbc, folio); 280 - folio_end_writeback(folio); 281 - folio_put(folio); 282 - } while (index = next, index <= last); 283 - 284 - _leave(""); 285 - } 286 17 287 18 /* 288 19 * completion of write to server 289 20 */ 290 21 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len) 291 22 { 292 - struct address_space *mapping = vnode->netfs.inode.i_mapping; 293 - struct folio *folio; 294 - pgoff_t end; 295 - 296 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 297 - 298 23 _enter("{%llx:%llu},{%x @%llx}", 299 24 vnode->fid.vid, vnode->fid.vnode, len, start); 300 - 301 - rcu_read_lock(); 302 - 303 - end = (start + len - 1) / PAGE_SIZE; 304 - xas_for_each(&xas, folio, end) { 305 - if (!folio_test_writeback(folio)) { 306 - kdebug("bad %x @%llx page %lx %lx", 307 - len, start, folio_index(folio), end); 308 - ASSERT(folio_test_writeback(folio)); 309 - } 310 - 311 - trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio); 312 - folio_detach_private(folio); 313 - folio_end_writeback(folio); 314 - } 315 - 316 - rcu_read_unlock(); 317 25 318 26 afs_prune_wb_keys(vnode); 319 27 _leave(""); ··· 159 451 return afs_put_operation(op); 160 452 } 161 453 162 - /* 163 - * Extend the region to be written back to include subsequent contiguously 164 - * dirty pages if possible, but don't sleep while doing so. 165 - * 166 - * If this page holds new content, then we can include filler zeros in the 167 - * writeback. 168 - */ 169 - static void afs_extend_writeback(struct address_space *mapping, 170 - struct afs_vnode *vnode, 171 - long *_count, 172 - loff_t start, 173 - loff_t max_len, 174 - bool new_content, 175 - bool caching, 176 - unsigned int *_len) 454 + static void afs_upload_to_server(struct netfs_io_subrequest *subreq) 177 455 { 178 - struct folio_batch fbatch; 179 - struct folio *folio; 180 - unsigned long priv; 181 - unsigned int psize, filler = 0; 182 - unsigned int f, t; 183 - loff_t len = *_len; 184 - pgoff_t index = (start + len) / PAGE_SIZE; 185 - bool stop = true; 186 - unsigned int i; 187 - 188 - XA_STATE(xas, &mapping->i_pages, index); 189 - folio_batch_init(&fbatch); 190 - 191 - do { 192 - /* Firstly, we gather up a batch of contiguous dirty pages 193 - * under the RCU read lock - but we can't clear the dirty flags 194 - * there if any of those pages are mapped. 195 - */ 196 - rcu_read_lock(); 197 - 198 - xas_for_each(&xas, folio, ULONG_MAX) { 199 - stop = true; 200 - if (xas_retry(&xas, folio)) 201 - continue; 202 - if (xa_is_value(folio)) 203 - break; 204 - if (folio_index(folio) != index) 205 - break; 206 - 207 - if (!folio_try_get_rcu(folio)) { 208 - xas_reset(&xas); 209 - continue; 210 - } 211 - 212 - /* Has the page moved or been split? */ 213 - if (unlikely(folio != xas_reload(&xas))) { 214 - folio_put(folio); 215 - break; 216 - } 217 - 218 - if (!folio_trylock(folio)) { 219 - folio_put(folio); 220 - break; 221 - } 222 - if (!folio_test_dirty(folio) || 223 - folio_test_writeback(folio) || 224 - folio_test_fscache(folio)) { 225 - folio_unlock(folio); 226 - folio_put(folio); 227 - break; 228 - } 229 - 230 - psize = folio_size(folio); 231 - priv = (unsigned long)folio_get_private(folio); 232 - f = afs_folio_dirty_from(folio, priv); 233 - t = afs_folio_dirty_to(folio, priv); 234 - if (f != 0 && !new_content) { 235 - folio_unlock(folio); 236 - folio_put(folio); 237 - break; 238 - } 239 - 240 - len += filler + t; 241 - filler = psize - t; 242 - if (len >= max_len || *_count <= 0) 243 - stop = true; 244 - else if (t == psize || new_content) 245 - stop = false; 246 - 247 - index += folio_nr_pages(folio); 248 - if (!folio_batch_add(&fbatch, folio)) 249 - break; 250 - if (stop) 251 - break; 252 - } 253 - 254 - if (!stop) 255 - xas_pause(&xas); 256 - rcu_read_unlock(); 257 - 258 - /* Now, if we obtained any folios, we can shift them to being 259 - * writable and mark them for caching. 260 - */ 261 - if (!folio_batch_count(&fbatch)) 262 - break; 263 - 264 - for (i = 0; i < folio_batch_count(&fbatch); i++) { 265 - folio = fbatch.folios[i]; 266 - trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio); 267 - 268 - if (!folio_clear_dirty_for_io(folio)) 269 - BUG(); 270 - folio_start_writeback(folio); 271 - afs_folio_start_fscache(caching, folio); 272 - 273 - *_count -= folio_nr_pages(folio); 274 - folio_unlock(folio); 275 - } 276 - 277 - folio_batch_release(&fbatch); 278 - cond_resched(); 279 - } while (!stop); 280 - 281 - *_len = len; 282 - } 283 - 284 - /* 285 - * Synchronously write back the locked page and any subsequent non-locked dirty 286 - * pages. 287 - */ 288 - static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, 289 - struct writeback_control *wbc, 290 - struct folio *folio, 291 - loff_t start, loff_t end) 292 - { 293 - struct afs_vnode *vnode = AFS_FS_I(mapping->host); 294 - struct iov_iter iter; 295 - unsigned long priv; 296 - unsigned int offset, to, len, max_len; 297 - loff_t i_size = i_size_read(&vnode->netfs.inode); 298 - bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); 299 - bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode)); 300 - long count = wbc->nr_to_write; 301 - int ret; 302 - 303 - _enter(",%lx,%llx-%llx", folio_index(folio), start, end); 304 - 305 - folio_start_writeback(folio); 306 - afs_folio_start_fscache(caching, folio); 307 - 308 - count -= folio_nr_pages(folio); 309 - 310 - /* Find all consecutive lockable dirty pages that have contiguous 311 - * written regions, stopping when we find a page that is not 312 - * immediately lockable, is not dirty or is missing, or we reach the 313 - * end of the range. 314 - */ 315 - priv = (unsigned long)folio_get_private(folio); 316 - offset = afs_folio_dirty_from(folio, priv); 317 - to = afs_folio_dirty_to(folio, priv); 318 - trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio); 319 - 320 - len = to - offset; 321 - start += offset; 322 - if (start < i_size) { 323 - /* Trim the write to the EOF; the extra data is ignored. Also 324 - * put an upper limit on the size of a single storedata op. 325 - */ 326 - max_len = 65536 * 4096; 327 - max_len = min_t(unsigned long long, max_len, end - start + 1); 328 - max_len = min_t(unsigned long long, max_len, i_size - start); 329 - 330 - if (len < max_len && 331 - (to == folio_size(folio) || new_content)) 332 - afs_extend_writeback(mapping, vnode, &count, 333 - start, max_len, new_content, 334 - caching, &len); 335 - len = min_t(loff_t, len, max_len); 336 - } 337 - 338 - /* We now have a contiguous set of dirty pages, each with writeback 339 - * set; the first page is still locked at this point, but all the rest 340 - * have been unlocked. 341 - */ 342 - folio_unlock(folio); 343 - 344 - if (start < i_size) { 345 - _debug("write back %x @%llx [%llx]", len, start, i_size); 346 - 347 - /* Speculatively write to the cache. We have to fix this up 348 - * later if the store fails. 349 - */ 350 - afs_write_to_cache(vnode, start, len, i_size, caching); 351 - 352 - iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len); 353 - ret = afs_store_data(vnode, &iter, start, false); 354 - } else { 355 - _debug("write discard %x @%llx [%llx]", len, start, i_size); 356 - 357 - /* The dirty region was entirely beyond the EOF. */ 358 - fscache_clear_page_bits(mapping, start, len, caching); 359 - afs_pages_written_back(vnode, start, len); 360 - ret = 0; 361 - } 362 - 363 - switch (ret) { 364 - case 0: 365 - wbc->nr_to_write = count; 366 - ret = len; 367 - break; 368 - 369 - default: 370 - pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); 371 - fallthrough; 372 - case -EACCES: 373 - case -EPERM: 374 - case -ENOKEY: 375 - case -EKEYEXPIRED: 376 - case -EKEYREJECTED: 377 - case -EKEYREVOKED: 378 - case -ENETRESET: 379 - afs_redirty_pages(wbc, mapping, start, len); 380 - mapping_set_error(mapping, ret); 381 - break; 382 - 383 - case -EDQUOT: 384 - case -ENOSPC: 385 - afs_redirty_pages(wbc, mapping, start, len); 386 - mapping_set_error(mapping, -ENOSPC); 387 - break; 388 - 389 - case -EROFS: 390 - case -EIO: 391 - case -EREMOTEIO: 392 - case -EFBIG: 393 - case -ENOENT: 394 - case -ENOMEDIUM: 395 - case -ENXIO: 396 - trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); 397 - afs_kill_pages(mapping, start, len); 398 - mapping_set_error(mapping, ret); 399 - break; 400 - } 401 - 402 - _leave(" = %d", ret); 403 - return ret; 404 - } 405 - 406 - /* 407 - * write a region of pages back to the server 408 - */ 409 - static int afs_writepages_region(struct address_space *mapping, 410 - struct writeback_control *wbc, 411 - loff_t start, loff_t end, loff_t *_next, 412 - bool max_one_loop) 413 - { 414 - struct folio *folio; 415 - struct folio_batch fbatch; 456 + struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); 416 457 ssize_t ret; 417 - unsigned int i; 418 - int n, skips = 0; 419 458 420 - _enter("%llx,%llx,", start, end); 421 - folio_batch_init(&fbatch); 459 + _enter("%x[%x],%zx", 460 + subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count); 422 461 423 - do { 424 - pgoff_t index = start / PAGE_SIZE; 462 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 463 + ret = afs_store_data(vnode, &subreq->io_iter, subreq->start, 464 + subreq->rreq->origin == NETFS_LAUNDER_WRITE); 465 + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, 466 + false); 467 + } 425 468 426 - n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, 427 - PAGECACHE_TAG_DIRTY, &fbatch); 469 + static void afs_upload_to_server_worker(struct work_struct *work) 470 + { 471 + struct netfs_io_subrequest *subreq = 472 + container_of(work, struct netfs_io_subrequest, work); 428 473 429 - if (!n) 430 - break; 431 - for (i = 0; i < n; i++) { 432 - folio = fbatch.folios[i]; 433 - start = folio_pos(folio); /* May regress with THPs */ 474 + afs_upload_to_server(subreq); 475 + } 434 476 435 - _debug("wback %lx", folio_index(folio)); 477 + /* 478 + * Set up write requests for a writeback slice. We need to add a write request 479 + * for each write we want to make. 480 + */ 481 + void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 482 + { 483 + struct netfs_io_subrequest *subreq; 436 484 437 - /* At this point we hold neither the i_pages lock nor the 438 - * page lock: the page may be truncated or invalidated 439 - * (changing page->mapping to NULL), or even swizzled 440 - * back from swapper_space to tmpfs file mapping 441 - */ 442 - try_again: 443 - if (wbc->sync_mode != WB_SYNC_NONE) { 444 - ret = folio_lock_killable(folio); 445 - if (ret < 0) { 446 - folio_batch_release(&fbatch); 447 - return ret; 448 - } 449 - } else { 450 - if (!folio_trylock(folio)) 451 - continue; 452 - } 485 + _enter("%x,%llx-%llx", wreq->debug_id, start, start + len); 453 486 454 - if (folio->mapping != mapping || 455 - !folio_test_dirty(folio)) { 456 - start += folio_size(folio); 457 - folio_unlock(folio); 458 - continue; 459 - } 460 - 461 - if (folio_test_writeback(folio) || 462 - folio_test_fscache(folio)) { 463 - folio_unlock(folio); 464 - if (wbc->sync_mode != WB_SYNC_NONE) { 465 - folio_wait_writeback(folio); 466 - #ifdef CONFIG_AFS_FSCACHE 467 - folio_wait_fscache(folio); 468 - #endif 469 - goto try_again; 470 - } 471 - 472 - start += folio_size(folio); 473 - if (wbc->sync_mode == WB_SYNC_NONE) { 474 - if (skips >= 5 || need_resched()) { 475 - *_next = start; 476 - folio_batch_release(&fbatch); 477 - _leave(" = 0 [%llx]", *_next); 478 - return 0; 479 - } 480 - skips++; 481 - } 482 - continue; 483 - } 484 - 485 - if (!folio_clear_dirty_for_io(folio)) 486 - BUG(); 487 - ret = afs_write_back_from_locked_folio(mapping, wbc, 488 - folio, start, end); 489 - if (ret < 0) { 490 - _leave(" = %zd", ret); 491 - folio_batch_release(&fbatch); 492 - return ret; 493 - } 494 - 495 - start += ret; 496 - } 497 - 498 - folio_batch_release(&fbatch); 499 - cond_resched(); 500 - } while (wbc->nr_to_write > 0); 501 - 502 - *_next = start; 503 - _leave(" = 0 [%llx]", *_next); 504 - return 0; 487 + subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 488 + start, len, afs_upload_to_server_worker); 489 + if (subreq) 490 + netfs_queue_write_request(subreq); 505 491 } 506 492 507 493 /* 508 494 * write some of the pending data back to the server 509 495 */ 510 - int afs_writepages(struct address_space *mapping, 511 - struct writeback_control *wbc) 496 + int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) 512 497 { 513 498 struct afs_vnode *vnode = AFS_FS_I(mapping->host); 514 - loff_t start, next; 515 499 int ret; 516 - 517 - _enter(""); 518 500 519 501 /* We have to be careful as we can end up racing with setattr() 520 502 * truncating the pagecache since the caller doesn't take a lock here ··· 215 817 else if (!down_read_trylock(&vnode->validate_lock)) 216 818 return 0; 217 819 218 - if (wbc->range_cyclic) { 219 - start = mapping->writeback_index * PAGE_SIZE; 220 - ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, 221 - &next, false); 222 - if (ret == 0) { 223 - mapping->writeback_index = next / PAGE_SIZE; 224 - if (start > 0 && wbc->nr_to_write > 0) { 225 - ret = afs_writepages_region(mapping, wbc, 0, 226 - start, &next, false); 227 - if (ret == 0) 228 - mapping->writeback_index = 229 - next / PAGE_SIZE; 230 - } 231 - } 232 - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 233 - ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, 234 - &next, false); 235 - if (wbc->nr_to_write > 0 && ret == 0) 236 - mapping->writeback_index = next / PAGE_SIZE; 237 - } else { 238 - ret = afs_writepages_region(mapping, wbc, 239 - wbc->range_start, wbc->range_end, 240 - &next, false); 241 - } 242 - 820 + ret = netfs_writepages(mapping, wbc); 243 821 up_read(&vnode->validate_lock); 244 - _leave(" = %d", ret); 245 822 return ret; 246 - } 247 - 248 - /* 249 - * write to an AFS file 250 - */ 251 - ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) 252 - { 253 - struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 254 - struct afs_file *af = iocb->ki_filp->private_data; 255 - ssize_t result; 256 - size_t count = iov_iter_count(from); 257 - 258 - _enter("{%llx:%llu},{%zu},", 259 - vnode->fid.vid, vnode->fid.vnode, count); 260 - 261 - if (IS_SWAPFILE(&vnode->netfs.inode)) { 262 - printk(KERN_INFO 263 - "AFS: Attempt to write to active swap file!\n"); 264 - return -EBUSY; 265 - } 266 - 267 - if (!count) 268 - return 0; 269 - 270 - result = afs_validate(vnode, af->key); 271 - if (result < 0) 272 - return result; 273 - 274 - result = generic_file_write_iter(iocb, from); 275 - 276 - _leave(" = %zd", result); 277 - return result; 278 823 } 279 824 280 825 /* ··· 248 907 */ 249 908 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) 250 909 { 251 - struct folio *folio = page_folio(vmf->page); 252 910 struct file *file = vmf->vma->vm_file; 253 - struct inode *inode = file_inode(file); 254 - struct afs_vnode *vnode = AFS_FS_I(inode); 255 - struct afs_file *af = file->private_data; 256 - unsigned long priv; 257 - vm_fault_t ret = VM_FAULT_RETRY; 258 911 259 - _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); 260 - 261 - afs_validate(vnode, af->key); 262 - 263 - sb_start_pagefault(inode->i_sb); 264 - 265 - /* Wait for the page to be written to the cache before we allow it to 266 - * be modified. We then assume the entire page will need writing back. 267 - */ 268 - #ifdef CONFIG_AFS_FSCACHE 269 - if (folio_test_fscache(folio) && 270 - folio_wait_fscache_killable(folio) < 0) 271 - goto out; 272 - #endif 273 - 274 - if (folio_wait_writeback_killable(folio)) 275 - goto out; 276 - 277 - if (folio_lock_killable(folio) < 0) 278 - goto out; 279 - 280 - /* We mustn't change folio->private until writeback is complete as that 281 - * details the portion of the page we need to write back and we might 282 - * need to redirty the page if there's a problem. 283 - */ 284 - if (folio_wait_writeback_killable(folio) < 0) { 285 - folio_unlock(folio); 286 - goto out; 287 - } 288 - 289 - priv = afs_folio_dirty(folio, 0, folio_size(folio)); 290 - priv = afs_folio_dirty_mmapped(priv); 291 - if (folio_test_private(folio)) { 292 - folio_change_private(folio, (void *)priv); 293 - trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio); 294 - } else { 295 - folio_attach_private(folio, (void *)priv); 296 - trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio); 297 - } 298 - file_update_time(file); 299 - 300 - ret = VM_FAULT_LOCKED; 301 - out: 302 - sb_end_pagefault(inode->i_sb); 303 - return ret; 912 + if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0) 913 + return VM_FAULT_SIGBUS; 914 + return netfs_page_mkwrite(vmf, NULL); 304 915 } 305 916 306 917 /* ··· 281 988 list_del(&wbk->vnode_link); 282 989 afs_put_wb_key(wbk); 283 990 } 284 - } 285 - 286 - /* 287 - * Clean up a page during invalidation. 288 - */ 289 - int afs_launder_folio(struct folio *folio) 290 - { 291 - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); 292 - struct iov_iter iter; 293 - struct bio_vec bv; 294 - unsigned long priv; 295 - unsigned int f, t; 296 - int ret = 0; 297 - 298 - _enter("{%lx}", folio->index); 299 - 300 - priv = (unsigned long)folio_get_private(folio); 301 - if (folio_clear_dirty_for_io(folio)) { 302 - f = 0; 303 - t = folio_size(folio); 304 - if (folio_test_private(folio)) { 305 - f = afs_folio_dirty_from(folio, priv); 306 - t = afs_folio_dirty_to(folio, priv); 307 - } 308 - 309 - bvec_set_folio(&bv, folio, t - f, f); 310 - iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, bv.bv_len); 311 - 312 - trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio); 313 - ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true); 314 - } 315 - 316 - trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio); 317 - folio_detach_private(folio); 318 - folio_wait_fscache(folio); 319 - return ret; 320 - } 321 - 322 - /* 323 - * Deal with the completion of writing the data to the cache. 324 - */ 325 - static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error, 326 - bool was_async) 327 - { 328 - struct afs_vnode *vnode = priv; 329 - 330 - if (IS_ERR_VALUE(transferred_or_error) && 331 - transferred_or_error != -ENOBUFS) 332 - afs_invalidate_cache(vnode, 0); 333 - } 334 - 335 - /* 336 - * Save the write to the cache also. 337 - */ 338 - static void afs_write_to_cache(struct afs_vnode *vnode, 339 - loff_t start, size_t len, loff_t i_size, 340 - bool caching) 341 - { 342 - fscache_write_to_cache(afs_vnode_cache(vnode), 343 - vnode->netfs.inode.i_mapping, start, len, i_size, 344 - afs_write_to_cache_done, vnode, caching); 345 991 }

+1 -1

fs/cachefiles/Kconfig

··· 2 2 3 3 config CACHEFILES 4 4 tristate "Filesystem caching on files" 5 - depends on FSCACHE && BLOCK 5 + depends on NETFS_SUPPORT && FSCACHE && BLOCK 6 6 help 7 7 This permits use of a mounted filesystem as a cache for other 8 8 filesystems - primarily networking filesystems - thus allowing fast

+1 -1

fs/cachefiles/internal.h

··· 246 246 enum fscache_want_state want_state); 247 247 extern int __cachefiles_prepare_write(struct cachefiles_object *object, 248 248 struct file *file, 249 - loff_t *_start, size_t *_len, 249 + loff_t *_start, size_t *_len, size_t upper_len, 250 250 bool no_space_allocated_yet); 251 251 extern int __cachefiles_write(struct cachefiles_object *object, 252 252 struct file *file,

+21 -13

fs/cachefiles/io.c

··· 517 517 */ 518 518 int __cachefiles_prepare_write(struct cachefiles_object *object, 519 519 struct file *file, 520 - loff_t *_start, size_t *_len, 520 + loff_t *_start, size_t *_len, size_t upper_len, 521 521 bool no_space_allocated_yet) 522 522 { 523 523 struct cachefiles_cache *cache = object->volume->cache; 524 524 loff_t start = *_start, pos; 525 - size_t len = *_len, down; 525 + size_t len = *_len; 526 526 int ret; 527 527 528 528 /* Round to DIO size */ 529 - down = start - round_down(start, PAGE_SIZE); 530 - *_start = start - down; 531 - *_len = round_up(down + len, PAGE_SIZE); 529 + start = round_down(*_start, PAGE_SIZE); 530 + if (start != *_start || *_len > upper_len) { 531 + /* Probably asked to cache a streaming write written into the 532 + * pagecache when the cookie was temporarily out of service to 533 + * culling. 534 + */ 535 + fscache_count_dio_misfit(); 536 + return -ENOBUFS; 537 + } 538 + 539 + *_len = round_up(len, PAGE_SIZE); 532 540 533 541 /* We need to work out whether there's sufficient disk space to perform 534 542 * the write - but we can skip that check if we have space already ··· 547 539 548 540 pos = cachefiles_inject_read_error(); 549 541 if (pos == 0) 550 - pos = vfs_llseek(file, *_start, SEEK_DATA); 542 + pos = vfs_llseek(file, start, SEEK_DATA); 551 543 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { 552 544 if (pos == -ENXIO) 553 545 goto check_space; /* Unallocated tail */ ··· 555 547 cachefiles_trace_seek_error); 556 548 return pos; 557 549 } 558 - if ((u64)pos >= (u64)*_start + *_len) 550 + if ((u64)pos >= (u64)start + *_len) 559 551 goto check_space; /* Unallocated region */ 560 552 561 553 /* We have a block that's at least partially filled - if we're low on ··· 568 560 569 561 pos = cachefiles_inject_read_error(); 570 562 if (pos == 0) 571 - pos = vfs_llseek(file, *_start, SEEK_HOLE); 563 + pos = vfs_llseek(file, start, SEEK_HOLE); 572 564 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { 573 565 trace_cachefiles_io_error(object, file_inode(file), pos, 574 566 cachefiles_trace_seek_error); 575 567 return pos; 576 568 } 577 - if ((u64)pos >= (u64)*_start + *_len) 569 + if ((u64)pos >= (u64)start + *_len) 578 570 return 0; /* Fully allocated */ 579 571 580 572 /* Partially allocated, but insufficient space: cull. */ ··· 582 574 ret = cachefiles_inject_remove_error(); 583 575 if (ret == 0) 584 576 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 585 - *_start, *_len); 577 + start, *_len); 586 578 if (ret < 0) { 587 579 trace_cachefiles_io_error(object, file_inode(file), ret, 588 580 cachefiles_trace_fallocate_error); ··· 599 591 } 600 592 601 593 static int cachefiles_prepare_write(struct netfs_cache_resources *cres, 602 - loff_t *_start, size_t *_len, loff_t i_size, 603 - bool no_space_allocated_yet) 594 + loff_t *_start, size_t *_len, size_t upper_len, 595 + loff_t i_size, bool no_space_allocated_yet) 604 596 { 605 597 struct cachefiles_object *object = cachefiles_cres_object(cres); 606 598 struct cachefiles_cache *cache = object->volume->cache; ··· 616 608 617 609 cachefiles_begin_secure(cache, &saved_cred); 618 610 ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), 619 - _start, _len, 611 + _start, _len, upper_len, 620 612 no_space_allocated_yet); 621 613 cachefiles_end_secure(cache, saved_cred); 622 614 return ret;

+1 -1

fs/cachefiles/ondemand.c

··· 50 50 return -ENOBUFS; 51 51 52 52 cachefiles_begin_secure(cache, &saved_cred); 53 - ret = __cachefiles_prepare_write(object, file, &pos, &len, true); 53 + ret = __cachefiles_prepare_write(object, file, &pos, &len, len, true); 54 54 cachefiles_end_secure(cache, saved_cred); 55 55 if (ret < 0) 56 56 return ret;

+2 -23

fs/ceph/addr.c

··· 159 159 ceph_put_snap_context(snapc); 160 160 } 161 161 162 - folio_wait_fscache(folio); 163 - } 164 - 165 - static bool ceph_release_folio(struct folio *folio, gfp_t gfp) 166 - { 167 - struct inode *inode = folio->mapping->host; 168 - struct ceph_client *cl = ceph_inode_to_client(inode); 169 - 170 - doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), 171 - folio->index, folio_test_dirty(folio) ? "" : "not "); 172 - 173 - if (folio_test_private(folio)) 174 - return false; 175 - 176 - if (folio_test_fscache(folio)) { 177 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 178 - return false; 179 - folio_wait_fscache(folio); 180 - } 181 - ceph_fscache_note_page_release(inode); 182 - return true; 162 + netfs_invalidate_folio(folio, offset, length); 183 163 } 184 164 185 165 static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) ··· 489 509 const struct netfs_request_ops ceph_netfs_ops = { 490 510 .init_request = ceph_init_request, 491 511 .free_request = ceph_netfs_free_request, 492 - .begin_cache_operation = ceph_begin_cache_operation, 493 512 .issue_read = ceph_netfs_issue_read, 494 513 .expand_readahead = ceph_netfs_expand_readahead, 495 514 .clamp_length = ceph_netfs_clamp_length, ··· 1565 1586 .write_end = ceph_write_end, 1566 1587 .dirty_folio = ceph_dirty_folio, 1567 1588 .invalidate_folio = ceph_invalidate_folio, 1568 - .release_folio = ceph_release_folio, 1589 + .release_folio = netfs_release_folio, 1569 1590 .direct_IO = noop_direct_IO, 1570 1591 }; 1571 1592

+7 -38

fs/ceph/cache.h

··· 43 43 } 44 44 } 45 45 46 - static inline void ceph_fscache_unpin_writeback(struct inode *inode, 46 + static inline int ceph_fscache_unpin_writeback(struct inode *inode, 47 47 struct writeback_control *wbc) 48 48 { 49 - fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode))); 49 + return netfs_unpin_writeback(inode, wbc); 50 50 } 51 51 52 - static inline int ceph_fscache_dirty_folio(struct address_space *mapping, 53 - struct folio *folio) 54 - { 55 - struct ceph_inode_info *ci = ceph_inode(mapping->host); 56 - 57 - return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci)); 58 - } 59 - 60 - static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) 61 - { 62 - struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode)); 63 - 64 - return fscache_begin_read_operation(&rreq->cache_resources, cookie); 65 - } 52 + #define ceph_fscache_dirty_folio netfs_dirty_folio 66 53 67 54 static inline bool ceph_is_cache_enabled(struct inode *inode) 68 55 { 69 56 return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode))); 70 57 } 71 58 72 - static inline void ceph_fscache_note_page_release(struct inode *inode) 73 - { 74 - struct ceph_inode_info *ci = ceph_inode(inode); 75 - 76 - fscache_note_page_release(ceph_fscache_cookie(ci)); 77 - } 78 59 #else /* CONFIG_CEPH_FSCACHE */ 79 60 static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc, 80 61 struct fs_context *fc) ··· 100 119 { 101 120 } 102 121 103 - static inline void ceph_fscache_unpin_writeback(struct inode *inode, 104 - struct writeback_control *wbc) 122 + static inline int ceph_fscache_unpin_writeback(struct inode *inode, 123 + struct writeback_control *wbc) 105 124 { 125 + return 0; 106 126 } 107 127 108 - static inline int ceph_fscache_dirty_folio(struct address_space *mapping, 109 - struct folio *folio) 110 - { 111 - return filemap_dirty_folio(mapping, folio); 112 - } 128 + #define ceph_fscache_dirty_folio filemap_dirty_folio 113 129 114 130 static inline bool ceph_is_cache_enabled(struct inode *inode) 115 131 { 116 132 return false; 117 - } 118 - 119 - static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) 120 - { 121 - return -ENOBUFS; 122 - } 123 - 124 - static inline void ceph_fscache_note_page_release(struct inode *inode) 125 - { 126 133 } 127 134 #endif /* CONFIG_CEPH_FSCACHE */ 128 135

+2 -2

fs/ceph/inode.c

··· 574 574 doutc(fsc->client, "%p\n", &ci->netfs.inode); 575 575 576 576 /* Set parameters for the netfs library */ 577 - netfs_inode_init(&ci->netfs, &ceph_netfs_ops); 577 + netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); 578 578 579 579 spin_lock_init(&ci->i_ceph_lock); 580 580 ··· 694 694 percpu_counter_dec(&mdsc->metric.total_inodes); 695 695 696 696 truncate_inode_pages_final(&inode->i_data); 697 - if (inode->i_state & I_PINNING_FSCACHE_WB) 697 + if (inode->i_state & I_PINNING_NETFS_WB) 698 698 ceph_fscache_unuse_cookie(inode, true); 699 699 clear_inode(inode); 700 700

+5 -2

fs/erofs/Kconfig

··· 114 114 115 115 config EROFS_FS_ONDEMAND 116 116 bool "EROFS fscache-based on-demand read support" 117 - depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y) 118 - default n 117 + depends on EROFS_FS 118 + select NETFS_SUPPORT 119 + select FSCACHE 120 + select CACHEFILES 121 + select CACHEFILES_ONDEMAND 119 122 help 120 123 This permits EROFS to use fscache-backed data blobs with on-demand 121 124 read support.

+5 -5

fs/fs-writeback.c

··· 1675 1675 1676 1676 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 1677 1677 inode->i_state |= I_DIRTY_PAGES; 1678 - else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) { 1678 + else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) { 1679 1679 if (!(inode->i_state & I_DIRTY_PAGES)) { 1680 - inode->i_state &= ~I_PINNING_FSCACHE_WB; 1681 - wbc->unpinned_fscache_wb = true; 1682 - dirty |= I_PINNING_FSCACHE_WB; /* Cause write_inode */ 1680 + inode->i_state &= ~I_PINNING_NETFS_WB; 1681 + wbc->unpinned_netfs_wb = true; 1682 + dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */ 1683 1683 } 1684 1684 } 1685 1685 ··· 1691 1691 if (ret == 0) 1692 1692 ret = err; 1693 1693 } 1694 - wbc->unpinned_fscache_wb = false; 1694 + wbc->unpinned_netfs_wb = false; 1695 1695 trace_writeback_single_inode(inode, wbc, nr_to_write); 1696 1696 return ret; 1697 1697 }

-40

fs/fscache/Kconfig

··· 1 - # SPDX-License-Identifier: GPL-2.0-only 2 - 3 - config FSCACHE 4 - tristate "General filesystem local caching manager" 5 - select NETFS_SUPPORT 6 - help 7 - This option enables a generic filesystem caching manager that can be 8 - used by various network and other filesystems to cache data locally. 9 - Different sorts of caches can be plugged in, depending on the 10 - resources available. 11 - 12 - See Documentation/filesystems/caching/fscache.rst for more information. 13 - 14 - config FSCACHE_STATS 15 - bool "Gather statistical information on local caching" 16 - depends on FSCACHE && PROC_FS 17 - select NETFS_STATS 18 - help 19 - This option causes statistical information to be gathered on local 20 - caching and exported through file: 21 - 22 - /proc/fs/fscache/stats 23 - 24 - The gathering of statistics adds a certain amount of overhead to 25 - execution as there are a quite a few stats gathered, and on a 26 - multi-CPU system these may be on cachelines that keep bouncing 27 - between CPUs. On the other hand, the stats are very useful for 28 - debugging purposes. Saying 'Y' here is recommended. 29 - 30 - See Documentation/filesystems/caching/fscache.rst for more information. 31 - 32 - config FSCACHE_DEBUG 33 - bool "Debug FS-Cache" 34 - depends on FSCACHE 35 - help 36 - This permits debugging to be dynamically enabled in the local caching 37 - management module. If this is set, the debugging output may be 38 - enabled by setting bits in /sys/modules/fscache/parameter/debug. 39 - 40 - See Documentation/filesystems/caching/fscache.rst for more information.

-16

fs/fscache/Makefile

··· 1 - # SPDX-License-Identifier: GPL-2.0 2 - # 3 - # Makefile for general filesystem caching code 4 - # 5 - 6 - fscache-y := \ 7 - cache.o \ 8 - cookie.o \ 9 - io.o \ 10 - main.o \ 11 - volume.o 12 - 13 - fscache-$(CONFIG_PROC_FS) += proc.o 14 - fscache-$(CONFIG_FSCACHE_STATS) += stats.o 15 - 16 - obj-$(CONFIG_FSCACHE) := fscache.o

fs/fscache/cache.c fs/netfs/fscache_cache.c

fs/fscache/cookie.c fs/netfs/fscache_cookie.c

-277

fs/fscache/internal.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 - /* Internal definitions for FS-Cache 3 - * 4 - * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 - * Written by David Howells (dhowells@redhat.com) 6 - */ 7 - 8 - #ifdef pr_fmt 9 - #undef pr_fmt 10 - #endif 11 - 12 - #define pr_fmt(fmt) "FS-Cache: " fmt 13 - 14 - #include <linux/slab.h> 15 - #include <linux/fscache-cache.h> 16 - #include <trace/events/fscache.h> 17 - #include <linux/sched.h> 18 - #include <linux/seq_file.h> 19 - 20 - /* 21 - * cache.c 22 - */ 23 - #ifdef CONFIG_PROC_FS 24 - extern const struct seq_operations fscache_caches_seq_ops; 25 - #endif 26 - bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); 27 - void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); 28 - struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache); 29 - void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where); 30 - 31 - static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache) 32 - { 33 - return smp_load_acquire(&cache->state); 34 - } 35 - 36 - static inline bool fscache_cache_is_live(const struct fscache_cache *cache) 37 - { 38 - return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE; 39 - } 40 - 41 - static inline void fscache_set_cache_state(struct fscache_cache *cache, 42 - enum fscache_cache_state new_state) 43 - { 44 - smp_store_release(&cache->state, new_state); 45 - 46 - } 47 - 48 - static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, 49 - enum fscache_cache_state old_state, 50 - enum fscache_cache_state new_state) 51 - { 52 - return try_cmpxchg_release(&cache->state, &old_state, new_state); 53 - } 54 - 55 - /* 56 - * cookie.c 57 - */ 58 - extern struct kmem_cache *fscache_cookie_jar; 59 - #ifdef CONFIG_PROC_FS 60 - extern const struct seq_operations fscache_cookies_seq_ops; 61 - #endif 62 - extern struct timer_list fscache_cookie_lru_timer; 63 - 64 - extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); 65 - extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie, 66 - enum fscache_access_trace why); 67 - 68 - static inline void fscache_see_cookie(struct fscache_cookie *cookie, 69 - enum fscache_cookie_trace where) 70 - { 71 - trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), 72 - where); 73 - } 74 - 75 - /* 76 - * main.c 77 - */ 78 - extern unsigned fscache_debug; 79 - 80 - extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len); 81 - 82 - /* 83 - * proc.c 84 - */ 85 - #ifdef CONFIG_PROC_FS 86 - extern int __init fscache_proc_init(void); 87 - extern void fscache_proc_cleanup(void); 88 - #else 89 - #define fscache_proc_init() (0) 90 - #define fscache_proc_cleanup() do {} while (0) 91 - #endif 92 - 93 - /* 94 - * stats.c 95 - */ 96 - #ifdef CONFIG_FSCACHE_STATS 97 - extern atomic_t fscache_n_volumes; 98 - extern atomic_t fscache_n_volumes_collision; 99 - extern atomic_t fscache_n_volumes_nomem; 100 - extern atomic_t fscache_n_cookies; 101 - extern atomic_t fscache_n_cookies_lru; 102 - extern atomic_t fscache_n_cookies_lru_expired; 103 - extern atomic_t fscache_n_cookies_lru_removed; 104 - extern atomic_t fscache_n_cookies_lru_dropped; 105 - 106 - extern atomic_t fscache_n_acquires; 107 - extern atomic_t fscache_n_acquires_ok; 108 - extern atomic_t fscache_n_acquires_oom; 109 - 110 - extern atomic_t fscache_n_invalidates; 111 - 112 - extern atomic_t fscache_n_relinquishes; 113 - extern atomic_t fscache_n_relinquishes_retire; 114 - extern atomic_t fscache_n_relinquishes_dropped; 115 - 116 - extern atomic_t fscache_n_resizes; 117 - extern atomic_t fscache_n_resizes_null; 118 - 119 - static inline void fscache_stat(atomic_t *stat) 120 - { 121 - atomic_inc(stat); 122 - } 123 - 124 - static inline void fscache_stat_d(atomic_t *stat) 125 - { 126 - atomic_dec(stat); 127 - } 128 - 129 - #define __fscache_stat(stat) (stat) 130 - 131 - int fscache_stats_show(struct seq_file *m, void *v); 132 - #else 133 - 134 - #define __fscache_stat(stat) (NULL) 135 - #define fscache_stat(stat) do {} while (0) 136 - #define fscache_stat_d(stat) do {} while (0) 137 - #endif 138 - 139 - /* 140 - * volume.c 141 - */ 142 - #ifdef CONFIG_PROC_FS 143 - extern const struct seq_operations fscache_volumes_seq_ops; 144 - #endif 145 - 146 - struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, 147 - enum fscache_volume_trace where); 148 - void fscache_put_volume(struct fscache_volume *volume, 149 - enum fscache_volume_trace where); 150 - bool fscache_begin_volume_access(struct fscache_volume *volume, 151 - struct fscache_cookie *cookie, 152 - enum fscache_access_trace why); 153 - void fscache_create_volume(struct fscache_volume *volume, bool wait); 154 - 155 - 156 - /*****************************************************************************/ 157 - /* 158 - * debug tracing 159 - */ 160 - #define dbgprintk(FMT, ...) \ 161 - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) 162 - 163 - #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) 164 - #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) 165 - #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) 166 - 167 - #define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__) 168 - 169 - #ifdef __KDEBUG 170 - #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) 171 - #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) 172 - #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) 173 - 174 - #elif defined(CONFIG_FSCACHE_DEBUG) 175 - #define _enter(FMT, ...) \ 176 - do { \ 177 - if (__do_kdebug(ENTER)) \ 178 - kenter(FMT, ##__VA_ARGS__); \ 179 - } while (0) 180 - 181 - #define _leave(FMT, ...) \ 182 - do { \ 183 - if (__do_kdebug(LEAVE)) \ 184 - kleave(FMT, ##__VA_ARGS__); \ 185 - } while (0) 186 - 187 - #define _debug(FMT, ...) \ 188 - do { \ 189 - if (__do_kdebug(DEBUG)) \ 190 - kdebug(FMT, ##__VA_ARGS__); \ 191 - } while (0) 192 - 193 - #else 194 - #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) 195 - #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) 196 - #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) 197 - #endif 198 - 199 - /* 200 - * determine whether a particular optional debugging point should be logged 201 - * - we need to go through three steps to persuade cpp to correctly join the 202 - * shorthand in FSCACHE_DEBUG_LEVEL with its prefix 203 - */ 204 - #define ____do_kdebug(LEVEL, POINT) \ 205 - unlikely((fscache_debug & \ 206 - (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) 207 - #define ___do_kdebug(LEVEL, POINT) \ 208 - ____do_kdebug(LEVEL, POINT) 209 - #define __do_kdebug(POINT) \ 210 - ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) 211 - 212 - #define FSCACHE_DEBUG_CACHE 0 213 - #define FSCACHE_DEBUG_COOKIE 1 214 - #define FSCACHE_DEBUG_OBJECT 2 215 - #define FSCACHE_DEBUG_OPERATION 3 216 - 217 - #define FSCACHE_POINT_ENTER 1 218 - #define FSCACHE_POINT_LEAVE 2 219 - #define FSCACHE_POINT_DEBUG 4 220 - 221 - #ifndef FSCACHE_DEBUG_LEVEL 222 - #define FSCACHE_DEBUG_LEVEL CACHE 223 - #endif 224 - 225 - /* 226 - * assertions 227 - */ 228 - #if 1 /* defined(__KDEBUGALL) */ 229 - 230 - #define ASSERT(X) \ 231 - do { \ 232 - if (unlikely(!(X))) { \ 233 - pr_err("\n"); \ 234 - pr_err("Assertion failed\n"); \ 235 - BUG(); \ 236 - } \ 237 - } while (0) 238 - 239 - #define ASSERTCMP(X, OP, Y) \ 240 - do { \ 241 - if (unlikely(!((X) OP (Y)))) { \ 242 - pr_err("\n"); \ 243 - pr_err("Assertion failed\n"); \ 244 - pr_err("%lx " #OP " %lx is false\n", \ 245 - (unsigned long)(X), (unsigned long)(Y)); \ 246 - BUG(); \ 247 - } \ 248 - } while (0) 249 - 250 - #define ASSERTIF(C, X) \ 251 - do { \ 252 - if (unlikely((C) && !(X))) { \ 253 - pr_err("\n"); \ 254 - pr_err("Assertion failed\n"); \ 255 - BUG(); \ 256 - } \ 257 - } while (0) 258 - 259 - #define ASSERTIFCMP(C, X, OP, Y) \ 260 - do { \ 261 - if (unlikely((C) && !((X) OP (Y)))) { \ 262 - pr_err("\n"); \ 263 - pr_err("Assertion failed\n"); \ 264 - pr_err("%lx " #OP " %lx is false\n", \ 265 - (unsigned long)(X), (unsigned long)(Y)); \ 266 - BUG(); \ 267 - } \ 268 - } while (0) 269 - 270 - #else 271 - 272 - #define ASSERT(X) do {} while (0) 273 - #define ASSERTCMP(X, OP, Y) do {} while (0) 274 - #define ASSERTIF(C, X) do {} while (0) 275 - #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) 276 - 277 - #endif /* assert or not */

+1 -41

fs/fscache/io.c fs/netfs/fscache_io.c

··· 158 158 } 159 159 EXPORT_SYMBOL(__fscache_begin_write_operation); 160 160 161 - /** 162 - * fscache_dirty_folio - Mark folio dirty and pin a cache object for writeback 163 - * @mapping: The mapping the folio belongs to. 164 - * @folio: The folio being dirtied. 165 - * @cookie: The cookie referring to the cache object 166 - * 167 - * Set the dirty flag on a folio and pin an in-use cache object in memory 168 - * so that writeback can later write to it. This is intended 169 - * to be called from the filesystem's ->dirty_folio() method. 170 - * 171 - * Return: true if the dirty flag was set on the folio, false otherwise. 172 - */ 173 - bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, 174 - struct fscache_cookie *cookie) 175 - { 176 - struct inode *inode = mapping->host; 177 - bool need_use = false; 178 - 179 - _enter(""); 180 - 181 - if (!filemap_dirty_folio(mapping, folio)) 182 - return false; 183 - if (!fscache_cookie_valid(cookie)) 184 - return true; 185 - 186 - if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { 187 - spin_lock(&inode->i_lock); 188 - if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { 189 - inode->i_state |= I_PINNING_FSCACHE_WB; 190 - need_use = true; 191 - } 192 - spin_unlock(&inode->i_lock); 193 - 194 - if (need_use) 195 - fscache_use_cookie(cookie, true); 196 - } 197 - return true; 198 - } 199 - EXPORT_SYMBOL(fscache_dirty_folio); 200 - 201 161 struct fscache_write_request { 202 162 struct netfs_cache_resources cache_resources; 203 163 struct address_space *mapping; ··· 237 277 fscache_access_io_write) < 0) 238 278 goto abandon_free; 239 279 240 - ret = cres->ops->prepare_write(cres, &start, &len, i_size, false); 280 + ret = cres->ops->prepare_write(cres, &start, &len, len, i_size, false); 241 281 if (ret < 0) 242 282 goto abandon_end; 243 283

+6 -19

fs/fscache/main.c fs/netfs/fscache_main.c

··· 8 8 #define FSCACHE_DEBUG_LEVEL CACHE 9 9 #include <linux/module.h> 10 10 #include <linux/init.h> 11 - #define CREATE_TRACE_POINTS 12 11 #include "internal.h" 13 - 14 - MODULE_DESCRIPTION("FS Cache Manager"); 15 - MODULE_AUTHOR("Red Hat, Inc."); 16 - MODULE_LICENSE("GPL"); 17 - 18 - unsigned fscache_debug; 19 - module_param_named(debug, fscache_debug, uint, 20 - S_IWUSR | S_IRUGO); 21 - MODULE_PARM_DESC(fscache_debug, 22 - "FS-Cache debugging mask"); 12 + #define CREATE_TRACE_POINTS 13 + #include <trace/events/fscache.h> 23 14 24 15 EXPORT_TRACEPOINT_SYMBOL(fscache_access_cache); 25 16 EXPORT_TRACEPOINT_SYMBOL(fscache_access_volume); ··· 62 71 /* 63 72 * initialise the fs caching module 64 73 */ 65 - static int __init fscache_init(void) 74 + int __init fscache_init(void) 66 75 { 67 76 int ret = -ENOMEM; 68 77 ··· 83 92 goto error_cookie_jar; 84 93 } 85 94 86 - pr_notice("Loaded\n"); 95 + pr_notice("FS-Cache loaded\n"); 87 96 return 0; 88 97 89 98 error_cookie_jar: ··· 94 103 return ret; 95 104 } 96 105 97 - fs_initcall(fscache_init); 98 - 99 106 /* 100 107 * clean up on module removal 101 108 */ 102 - static void __exit fscache_exit(void) 109 + void __exit fscache_exit(void) 103 110 { 104 111 _enter(""); 105 112 106 113 kmem_cache_destroy(fscache_cookie_jar); 107 114 fscache_proc_cleanup(); 108 115 destroy_workqueue(fscache_wq); 109 - pr_notice("Unloaded\n"); 116 + pr_notice("FS-Cache unloaded\n"); 110 117 } 111 - 112 - module_exit(fscache_exit);

+8 -15

fs/fscache/proc.c fs/netfs/fscache_proc.c

··· 12 12 #include "internal.h" 13 13 14 14 /* 15 - * initialise the /proc/fs/fscache/ directory 15 + * Add files to /proc/fs/netfs/. 16 16 */ 17 17 int __init fscache_proc_init(void) 18 18 { 19 - if (!proc_mkdir("fs/fscache", NULL)) 20 - goto error_dir; 19 + if (!proc_symlink("fs/fscache", NULL, "netfs")) 20 + goto error_sym; 21 21 22 - if (!proc_create_seq("fs/fscache/caches", S_IFREG | 0444, NULL, 22 + if (!proc_create_seq("fs/netfs/caches", S_IFREG | 0444, NULL, 23 23 &fscache_caches_seq_ops)) 24 24 goto error; 25 25 26 - if (!proc_create_seq("fs/fscache/volumes", S_IFREG | 0444, NULL, 26 + if (!proc_create_seq("fs/netfs/volumes", S_IFREG | 0444, NULL, 27 27 &fscache_volumes_seq_ops)) 28 28 goto error; 29 29 30 - if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL, 30 + if (!proc_create_seq("fs/netfs/cookies", S_IFREG | 0444, NULL, 31 31 &fscache_cookies_seq_ops)) 32 32 goto error; 33 - 34 - #ifdef CONFIG_FSCACHE_STATS 35 - if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL, 36 - fscache_stats_show)) 37 - goto error; 38 - #endif 39 - 40 33 return 0; 41 34 42 35 error: 43 36 remove_proc_entry("fs/fscache", NULL); 44 - error_dir: 37 + error_sym: 45 38 return -ENOMEM; 46 39 } 47 40 48 41 /* 49 - * clean up the /proc/fs/fscache/ directory 42 + * Clean up the /proc/fs/fscache symlink. 50 43 */ 51 44 void fscache_proc_cleanup(void) 52 45 {

+7 -6

fs/fscache/stats.c fs/netfs/fscache_stats.c

··· 48 48 EXPORT_SYMBOL(fscache_n_no_create_space); 49 49 atomic_t fscache_n_culled; 50 50 EXPORT_SYMBOL(fscache_n_culled); 51 + atomic_t fscache_n_dio_misfit; 52 + EXPORT_SYMBOL(fscache_n_dio_misfit); 51 53 52 54 /* 53 55 * display the general statistics 54 56 */ 55 - int fscache_stats_show(struct seq_file *m, void *v) 57 + int fscache_stats_show(struct seq_file *m) 56 58 { 57 - seq_puts(m, "FS-Cache statistics\n"); 59 + seq_puts(m, "-- FS-Cache statistics --\n"); 58 60 seq_printf(m, "Cookies: n=%d v=%d vcol=%u voom=%u\n", 59 61 atomic_read(&fscache_n_cookies), 60 62 atomic_read(&fscache_n_volumes), ··· 95 93 atomic_read(&fscache_n_no_create_space), 96 94 atomic_read(&fscache_n_culled)); 97 95 98 - seq_printf(m, "IO : rd=%u wr=%u\n", 96 + seq_printf(m, "IO : rd=%u wr=%u mis=%u\n", 99 97 atomic_read(&fscache_n_read), 100 - atomic_read(&fscache_n_write)); 101 - 102 - netfs_stats_show(m); 98 + atomic_read(&fscache_n_write), 99 + atomic_read(&fscache_n_dio_misfit)); 103 100 return 0; 104 101 }

fs/fscache/volume.c fs/netfs/fscache_volume.c

+39

fs/netfs/Kconfig

··· 21 21 multi-CPU system these may be on cachelines that keep bouncing 22 22 between CPUs. On the other hand, the stats are very useful for 23 23 debugging purposes. Saying 'Y' here is recommended. 24 + 25 + config FSCACHE 26 + bool "General filesystem local caching manager" 27 + depends on NETFS_SUPPORT 28 + help 29 + This option enables a generic filesystem caching manager that can be 30 + used by various network and other filesystems to cache data locally. 31 + Different sorts of caches can be plugged in, depending on the 32 + resources available. 33 + 34 + See Documentation/filesystems/caching/fscache.rst for more information. 35 + 36 + config FSCACHE_STATS 37 + bool "Gather statistical information on local caching" 38 + depends on FSCACHE && PROC_FS 39 + select NETFS_STATS 40 + help 41 + This option causes statistical information to be gathered on local 42 + caching and exported through file: 43 + 44 + /proc/fs/fscache/stats 45 + 46 + The gathering of statistics adds a certain amount of overhead to 47 + execution as there are a quite a few stats gathered, and on a 48 + multi-CPU system these may be on cachelines that keep bouncing 49 + between CPUs. On the other hand, the stats are very useful for 50 + debugging purposes. Saying 'Y' here is recommended. 51 + 52 + See Documentation/filesystems/caching/fscache.rst for more information. 53 + 54 + config FSCACHE_DEBUG 55 + bool "Debug FS-Cache" 56 + depends on FSCACHE 57 + help 58 + This permits debugging to be dynamically enabled in the local caching 59 + management module. If this is set, the debugging output may be 60 + enabled by setting bits in /sys/modules/fscache/parameter/debug. 61 + 62 + See Documentation/filesystems/caching/fscache.rst for more information.

+20 -2

fs/netfs/Makefile

··· 2 2 3 3 netfs-y := \ 4 4 buffered_read.o \ 5 + buffered_write.o \ 6 + direct_read.o \ 7 + direct_write.o \ 5 8 io.o \ 6 9 iterator.o \ 10 + locking.o \ 7 11 main.o \ 8 - objects.o 12 + misc.o \ 13 + objects.o \ 14 + output.o 9 15 10 16 netfs-$(CONFIG_NETFS_STATS) += stats.o 11 17 12 - obj-$(CONFIG_NETFS_SUPPORT) := netfs.o 18 + netfs-$(CONFIG_FSCACHE) += \ 19 + fscache_cache.o \ 20 + fscache_cookie.o \ 21 + fscache_io.o \ 22 + fscache_main.o \ 23 + fscache_volume.o 24 + 25 + ifeq ($(CONFIG_PROC_FS),y) 26 + netfs-$(CONFIG_FSCACHE) += fscache_proc.o 27 + endif 28 + netfs-$(CONFIG_FSCACHE_STATS) += fscache_stats.o 29 + 30 + obj-$(CONFIG_NETFS_SUPPORT) += netfs.o

+213 -16

fs/netfs/buffered_read.c

··· 16 16 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17 17 { 18 18 struct netfs_io_subrequest *subreq; 19 + struct netfs_folio *finfo; 19 20 struct folio *folio; 20 21 pgoff_t start_page = rreq->start / PAGE_SIZE; 21 22 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; ··· 64 63 break; 65 64 } 66 65 if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 66 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 67 67 folio_start_fscache(folio); 68 68 folio_started = true; 69 69 } ··· 88 86 89 87 if (!pg_failed) { 90 88 flush_dcache_folio(folio); 89 + finfo = netfs_folio_info(folio); 90 + if (finfo) { 91 + trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 92 + if (finfo->netfs_group) 93 + folio_change_private(folio, finfo->netfs_group); 94 + else 95 + folio_detach_private(folio); 96 + kfree(finfo); 97 + } 91 98 folio_mark_uptodate(folio); 92 99 } 93 100 ··· 158 147 } 159 148 } 160 149 150 + /* 151 + * Begin an operation, and fetch the stored zero point value from the cookie if 152 + * available. 153 + */ 154 + static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 155 + { 156 + return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 157 + } 158 + 161 159 /** 162 160 * netfs_readahead - Helper to manage a read request 163 161 * @ractl: The description of the readahead request ··· 200 180 if (IS_ERR(rreq)) 201 181 return; 202 182 203 - if (ctx->ops->begin_cache_operation) { 204 - ret = ctx->ops->begin_cache_operation(rreq); 205 - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 206 - goto cleanup_free; 207 - } 183 + ret = netfs_begin_cache_read(rreq, ctx); 184 + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 185 + goto cleanup_free; 208 186 209 187 netfs_stat(&netfs_n_rh_readahead); 210 188 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 211 189 netfs_read_trace_readahead); 212 190 213 191 netfs_rreq_expand(rreq, ractl); 192 + 193 + /* Set up the output buffer */ 194 + iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages, 195 + rreq->start, rreq->len); 214 196 215 197 /* Drop the refs on the folios here rather than in the cache or 216 198 * filesystem. The locks will be dropped in netfs_rreq_unlock(). ··· 221 199 ; 222 200 223 201 netfs_begin_read(rreq, false); 202 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 224 203 return; 225 204 226 205 cleanup_free: ··· 249 226 struct address_space *mapping = folio_file_mapping(folio); 250 227 struct netfs_io_request *rreq; 251 228 struct netfs_inode *ctx = netfs_inode(mapping->host); 229 + struct folio *sink = NULL; 252 230 int ret; 253 231 254 232 _enter("%lx", folio_index(folio)); ··· 262 238 goto alloc_error; 263 239 } 264 240 265 - if (ctx->ops->begin_cache_operation) { 266 - ret = ctx->ops->begin_cache_operation(rreq); 267 - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 268 - goto discard; 269 - } 241 + ret = netfs_begin_cache_read(rreq, ctx); 242 + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 243 + goto discard; 270 244 271 245 netfs_stat(&netfs_n_rh_readpage); 272 246 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 273 - return netfs_begin_read(rreq, true); 247 + 248 + /* Set up the output buffer */ 249 + if (folio_test_dirty(folio)) { 250 + /* Handle someone trying to read from an unflushed streaming 251 + * write. We fiddle the buffer so that a gap at the beginning 252 + * and/or a gap at the end get copied to, but the middle is 253 + * discarded. 254 + */ 255 + struct netfs_folio *finfo = netfs_folio_info(folio); 256 + struct bio_vec *bvec; 257 + unsigned int from = finfo->dirty_offset; 258 + unsigned int to = from + finfo->dirty_len; 259 + unsigned int off = 0, i = 0; 260 + size_t flen = folio_size(folio); 261 + size_t nr_bvec = flen / PAGE_SIZE + 2; 262 + size_t part; 263 + 264 + ret = -ENOMEM; 265 + bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 266 + if (!bvec) 267 + goto discard; 268 + 269 + sink = folio_alloc(GFP_KERNEL, 0); 270 + if (!sink) 271 + goto discard; 272 + 273 + trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 274 + 275 + rreq->direct_bv = bvec; 276 + rreq->direct_bv_count = nr_bvec; 277 + if (from > 0) { 278 + bvec_set_folio(&bvec[i++], folio, from, 0); 279 + off = from; 280 + } 281 + while (off < to) { 282 + part = min_t(size_t, to - off, PAGE_SIZE); 283 + bvec_set_folio(&bvec[i++], sink, part, 0); 284 + off += part; 285 + } 286 + if (to < flen) 287 + bvec_set_folio(&bvec[i++], folio, flen - to, to); 288 + iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 289 + } else { 290 + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 291 + rreq->start, rreq->len); 292 + } 293 + 294 + ret = netfs_begin_read(rreq, true); 295 + if (sink) 296 + folio_put(sink); 297 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 298 + return ret < 0 ? ret : 0; 274 299 275 300 discard: 276 301 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); ··· 463 390 rreq->no_unlock_folio = folio_index(folio); 464 391 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 465 392 466 - if (ctx->ops->begin_cache_operation) { 467 - ret = ctx->ops->begin_cache_operation(rreq); 468 - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 469 - goto error_put; 470 - } 393 + ret = netfs_begin_cache_read(rreq, ctx); 394 + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 395 + goto error_put; 471 396 472 397 netfs_stat(&netfs_n_rh_write_begin); 473 398 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); ··· 476 405 ractl._nr_pages = folio_nr_pages(folio); 477 406 netfs_rreq_expand(rreq, &ractl); 478 407 408 + /* Set up the output buffer */ 409 + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 410 + rreq->start, rreq->len); 411 + 479 412 /* We hold the folio locks, so we can drop the references */ 480 413 folio_get(folio); 481 414 while (readahead_folio(&ractl)) ··· 488 413 ret = netfs_begin_read(rreq, true); 489 414 if (ret < 0) 490 415 goto error; 416 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 491 417 492 418 have_folio: 493 419 ret = folio_wait_fscache_killable(folio); ··· 510 434 return ret; 511 435 } 512 436 EXPORT_SYMBOL(netfs_write_begin); 437 + 438 + /* 439 + * Preload the data into a page we're proposing to write into. 440 + */ 441 + int netfs_prefetch_for_write(struct file *file, struct folio *folio, 442 + size_t offset, size_t len) 443 + { 444 + struct netfs_io_request *rreq; 445 + struct address_space *mapping = folio_file_mapping(folio); 446 + struct netfs_inode *ctx = netfs_inode(mapping->host); 447 + unsigned long long start = folio_pos(folio); 448 + size_t flen = folio_size(folio); 449 + int ret; 450 + 451 + _enter("%zx @%llx", flen, start); 452 + 453 + ret = -ENOMEM; 454 + 455 + rreq = netfs_alloc_request(mapping, file, start, flen, 456 + NETFS_READ_FOR_WRITE); 457 + if (IS_ERR(rreq)) { 458 + ret = PTR_ERR(rreq); 459 + goto error; 460 + } 461 + 462 + rreq->no_unlock_folio = folio_index(folio); 463 + __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 464 + ret = netfs_begin_cache_read(rreq, ctx); 465 + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 466 + goto error_put; 467 + 468 + netfs_stat(&netfs_n_rh_write_begin); 469 + trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 470 + 471 + /* Set up the output buffer */ 472 + iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 473 + rreq->start, rreq->len); 474 + 475 + ret = netfs_begin_read(rreq, true); 476 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 477 + return ret; 478 + 479 + error_put: 480 + netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 481 + error: 482 + _leave(" = %d", ret); 483 + return ret; 484 + } 485 + 486 + /** 487 + * netfs_buffered_read_iter - Filesystem buffered I/O read routine 488 + * @iocb: kernel I/O control block 489 + * @iter: destination for the data read 490 + * 491 + * This is the ->read_iter() routine for all filesystems that can use the page 492 + * cache directly. 493 + * 494 + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 495 + * returned when no data can be read without waiting for I/O requests to 496 + * complete; it doesn't prevent readahead. 497 + * 498 + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 499 + * shall be made for the read or for readahead. When no data can be read, 500 + * -EAGAIN shall be returned. When readahead would be triggered, a partial, 501 + * possibly empty read shall be returned. 502 + * 503 + * Return: 504 + * * number of bytes copied, even for partial reads 505 + * * negative error code (or 0 if IOCB_NOIO) if nothing was read 506 + */ 507 + ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 508 + { 509 + struct inode *inode = file_inode(iocb->ki_filp); 510 + struct netfs_inode *ictx = netfs_inode(inode); 511 + ssize_t ret; 512 + 513 + if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 514 + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 515 + return -EINVAL; 516 + 517 + ret = netfs_start_io_read(inode); 518 + if (ret == 0) { 519 + ret = filemap_read(iocb, iter, 0); 520 + netfs_end_io_read(inode); 521 + } 522 + return ret; 523 + } 524 + EXPORT_SYMBOL(netfs_buffered_read_iter); 525 + 526 + /** 527 + * netfs_file_read_iter - Generic filesystem read routine 528 + * @iocb: kernel I/O control block 529 + * @iter: destination for the data read 530 + * 531 + * This is the ->read_iter() routine for all filesystems that can use the page 532 + * cache directly. 533 + * 534 + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 535 + * returned when no data can be read without waiting for I/O requests to 536 + * complete; it doesn't prevent readahead. 537 + * 538 + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 539 + * shall be made for the read or for readahead. When no data can be read, 540 + * -EAGAIN shall be returned. When readahead would be triggered, a partial, 541 + * possibly empty read shall be returned. 542 + * 543 + * Return: 544 + * * number of bytes copied, even for partial reads 545 + * * negative error code (or 0 if IOCB_NOIO) if nothing was read 546 + */ 547 + ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 548 + { 549 + struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 550 + 551 + if ((iocb->ki_flags & IOCB_DIRECT) || 552 + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 553 + return netfs_unbuffered_read_iter(iocb, iter); 554 + 555 + return netfs_buffered_read_iter(iocb, iter); 556 + } 557 + EXPORT_SYMBOL(netfs_file_read_iter);

+1253

fs/netfs/buffered_write.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem high-level write support. 3 + * 4 + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/export.h> 9 + #include <linux/fs.h> 10 + #include <linux/mm.h> 11 + #include <linux/pagemap.h> 12 + #include <linux/slab.h> 13 + #include <linux/pagevec.h> 14 + #include "internal.h" 15 + 16 + /* 17 + * Determined write method. Adjust netfs_folio_traces if this is changed. 18 + */ 19 + enum netfs_how_to_modify { 20 + NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */ 21 + NETFS_JUST_PREFETCH, /* We have to read the folio anyway */ 22 + NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */ 23 + NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */ 24 + NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */ 25 + NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */ 26 + NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ 27 + }; 28 + 29 + static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); 30 + 31 + static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) 32 + { 33 + if (netfs_group && !folio_get_private(folio)) 34 + folio_attach_private(folio, netfs_get_group(netfs_group)); 35 + } 36 + 37 + #if IS_ENABLED(CONFIG_FSCACHE) 38 + static void netfs_folio_start_fscache(bool caching, struct folio *folio) 39 + { 40 + if (caching) 41 + folio_start_fscache(folio); 42 + } 43 + #else 44 + static void netfs_folio_start_fscache(bool caching, struct folio *folio) 45 + { 46 + } 47 + #endif 48 + 49 + /* 50 + * Decide how we should modify a folio. We might be attempting to do 51 + * write-streaming, in which case we don't want to a local RMW cycle if we can 52 + * avoid it. If we're doing local caching or content crypto, we award that 53 + * priority over avoiding RMW. If the file is open readably, then we also 54 + * assume that we may want to read what we wrote. 55 + */ 56 + static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, 57 + struct file *file, 58 + struct folio *folio, 59 + void *netfs_group, 60 + size_t flen, 61 + size_t offset, 62 + size_t len, 63 + bool maybe_trouble) 64 + { 65 + struct netfs_folio *finfo = netfs_folio_info(folio); 66 + loff_t pos = folio_file_pos(folio); 67 + 68 + _enter(""); 69 + 70 + if (netfs_folio_group(folio) != netfs_group) 71 + return NETFS_FLUSH_CONTENT; 72 + 73 + if (folio_test_uptodate(folio)) 74 + return NETFS_FOLIO_IS_UPTODATE; 75 + 76 + if (pos >= ctx->zero_point) 77 + return NETFS_MODIFY_AND_CLEAR; 78 + 79 + if (!maybe_trouble && offset == 0 && len >= flen) 80 + return NETFS_WHOLE_FOLIO_MODIFY; 81 + 82 + if (file->f_mode & FMODE_READ) 83 + goto no_write_streaming; 84 + if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 85 + goto no_write_streaming; 86 + 87 + if (netfs_is_cache_enabled(ctx)) { 88 + /* We don't want to get a streaming write on a file that loses 89 + * caching service temporarily because the backing store got 90 + * culled. 91 + */ 92 + if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 93 + set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); 94 + goto no_write_streaming; 95 + } 96 + 97 + if (!finfo) 98 + return NETFS_STREAMING_WRITE; 99 + 100 + /* We can continue a streaming write only if it continues on from the 101 + * previous. If it overlaps, we must flush lest we suffer a partial 102 + * copy and disjoint dirty regions. 103 + */ 104 + if (offset == finfo->dirty_offset + finfo->dirty_len) 105 + return NETFS_STREAMING_WRITE_CONT; 106 + return NETFS_FLUSH_CONTENT; 107 + 108 + no_write_streaming: 109 + if (finfo) { 110 + netfs_stat(&netfs_n_wh_wstream_conflict); 111 + return NETFS_FLUSH_CONTENT; 112 + } 113 + return NETFS_JUST_PREFETCH; 114 + } 115 + 116 + /* 117 + * Grab a folio for writing and lock it. Attempt to allocate as large a folio 118 + * as possible to hold as much of the remaining length as possible in one go. 119 + */ 120 + static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, 121 + loff_t pos, size_t part) 122 + { 123 + pgoff_t index = pos / PAGE_SIZE; 124 + fgf_t fgp_flags = FGP_WRITEBEGIN; 125 + 126 + if (mapping_large_folio_support(mapping)) 127 + fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part); 128 + 129 + return __filemap_get_folio(mapping, index, fgp_flags, 130 + mapping_gfp_mask(mapping)); 131 + } 132 + 133 + /** 134 + * netfs_perform_write - Copy data into the pagecache. 135 + * @iocb: The operation parameters 136 + * @iter: The source buffer 137 + * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 138 + * 139 + * Copy data into pagecache pages attached to the inode specified by @iocb. 140 + * The caller must hold appropriate inode locks. 141 + * 142 + * Dirty pages are tagged with a netfs_folio struct if they're not up to date 143 + * to indicate the range modified. Dirty pages may also be tagged with a 144 + * netfs-specific grouping such that data from an old group gets flushed before 145 + * a new one is started. 146 + */ 147 + ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, 148 + struct netfs_group *netfs_group) 149 + { 150 + struct file *file = iocb->ki_filp; 151 + struct inode *inode = file_inode(file); 152 + struct address_space *mapping = inode->i_mapping; 153 + struct netfs_inode *ctx = netfs_inode(inode); 154 + struct writeback_control wbc = { 155 + .sync_mode = WB_SYNC_NONE, 156 + .for_sync = true, 157 + .nr_to_write = LONG_MAX, 158 + .range_start = iocb->ki_pos, 159 + .range_end = iocb->ki_pos + iter->count, 160 + }; 161 + struct netfs_io_request *wreq = NULL; 162 + struct netfs_folio *finfo; 163 + struct folio *folio; 164 + enum netfs_how_to_modify howto; 165 + enum netfs_folio_trace trace; 166 + unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; 167 + ssize_t written = 0, ret; 168 + loff_t i_size, pos = iocb->ki_pos, from, to; 169 + size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; 170 + bool maybe_trouble = false; 171 + 172 + if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || 173 + iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) 174 + ) { 175 + if (pos < i_size_read(inode)) { 176 + ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); 177 + if (ret < 0) { 178 + goto out; 179 + } 180 + } 181 + 182 + wbc_attach_fdatawrite_inode(&wbc, mapping->host); 183 + 184 + wreq = netfs_begin_writethrough(iocb, iter->count); 185 + if (IS_ERR(wreq)) { 186 + wbc_detach_inode(&wbc); 187 + ret = PTR_ERR(wreq); 188 + wreq = NULL; 189 + goto out; 190 + } 191 + if (!is_sync_kiocb(iocb)) 192 + wreq->iocb = iocb; 193 + wreq->cleanup = netfs_cleanup_buffered_write; 194 + } 195 + 196 + do { 197 + size_t flen; 198 + size_t offset; /* Offset into pagecache folio */ 199 + size_t part; /* Bytes to write to folio */ 200 + size_t copied; /* Bytes copied from user */ 201 + 202 + ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); 203 + if (unlikely(ret < 0)) 204 + break; 205 + 206 + offset = pos & (max_chunk - 1); 207 + part = min(max_chunk - offset, iov_iter_count(iter)); 208 + 209 + /* Bring in the user pages that we will copy from _first_ lest 210 + * we hit a nasty deadlock on copying from the same page as 211 + * we're writing to, without it being marked uptodate. 212 + * 213 + * Not only is this an optimisation, but it is also required to 214 + * check that the address is actually valid, when atomic 215 + * usercopies are used below. 216 + * 217 + * We rely on the page being held onto long enough by the LRU 218 + * that we can grab it below if this causes it to be read. 219 + */ 220 + ret = -EFAULT; 221 + if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) 222 + break; 223 + 224 + ret = -ENOMEM; 225 + folio = netfs_grab_folio_for_write(mapping, pos, part); 226 + if (!folio) 227 + break; 228 + 229 + flen = folio_size(folio); 230 + offset = pos & (flen - 1); 231 + part = min_t(size_t, flen - offset, part); 232 + 233 + if (signal_pending(current)) { 234 + ret = written ? -EINTR : -ERESTARTSYS; 235 + goto error_folio_unlock; 236 + } 237 + 238 + /* See if we need to prefetch the area we're going to modify. 239 + * We need to do this before we get a lock on the folio in case 240 + * there's more than one writer competing for the same cache 241 + * block. 242 + */ 243 + howto = netfs_how_to_modify(ctx, file, folio, netfs_group, 244 + flen, offset, part, maybe_trouble); 245 + _debug("howto %u", howto); 246 + switch (howto) { 247 + case NETFS_JUST_PREFETCH: 248 + ret = netfs_prefetch_for_write(file, folio, offset, part); 249 + if (ret < 0) { 250 + _debug("prefetch = %zd", ret); 251 + goto error_folio_unlock; 252 + } 253 + break; 254 + case NETFS_FOLIO_IS_UPTODATE: 255 + case NETFS_WHOLE_FOLIO_MODIFY: 256 + case NETFS_STREAMING_WRITE_CONT: 257 + break; 258 + case NETFS_MODIFY_AND_CLEAR: 259 + zero_user_segment(&folio->page, 0, offset); 260 + break; 261 + case NETFS_STREAMING_WRITE: 262 + ret = -EIO; 263 + if (WARN_ON(folio_get_private(folio))) 264 + goto error_folio_unlock; 265 + break; 266 + case NETFS_FLUSH_CONTENT: 267 + trace_netfs_folio(folio, netfs_flush_content); 268 + from = folio_pos(folio); 269 + to = from + folio_size(folio) - 1; 270 + folio_unlock(folio); 271 + folio_put(folio); 272 + ret = filemap_write_and_wait_range(mapping, from, to); 273 + if (ret < 0) 274 + goto error_folio_unlock; 275 + continue; 276 + } 277 + 278 + if (mapping_writably_mapped(mapping)) 279 + flush_dcache_folio(folio); 280 + 281 + copied = copy_folio_from_iter_atomic(folio, offset, part, iter); 282 + 283 + flush_dcache_folio(folio); 284 + 285 + /* Deal with a (partially) failed copy */ 286 + if (copied == 0) { 287 + ret = -EFAULT; 288 + goto error_folio_unlock; 289 + } 290 + 291 + trace = (enum netfs_folio_trace)howto; 292 + switch (howto) { 293 + case NETFS_FOLIO_IS_UPTODATE: 294 + case NETFS_JUST_PREFETCH: 295 + netfs_set_group(folio, netfs_group); 296 + break; 297 + case NETFS_MODIFY_AND_CLEAR: 298 + zero_user_segment(&folio->page, offset + copied, flen); 299 + netfs_set_group(folio, netfs_group); 300 + folio_mark_uptodate(folio); 301 + break; 302 + case NETFS_WHOLE_FOLIO_MODIFY: 303 + if (unlikely(copied < part)) { 304 + maybe_trouble = true; 305 + iov_iter_revert(iter, copied); 306 + copied = 0; 307 + goto retry; 308 + } 309 + netfs_set_group(folio, netfs_group); 310 + folio_mark_uptodate(folio); 311 + break; 312 + case NETFS_STREAMING_WRITE: 313 + if (offset == 0 && copied == flen) { 314 + netfs_set_group(folio, netfs_group); 315 + folio_mark_uptodate(folio); 316 + trace = netfs_streaming_filled_page; 317 + break; 318 + } 319 + finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); 320 + if (!finfo) { 321 + iov_iter_revert(iter, copied); 322 + ret = -ENOMEM; 323 + goto error_folio_unlock; 324 + } 325 + finfo->netfs_group = netfs_get_group(netfs_group); 326 + finfo->dirty_offset = offset; 327 + finfo->dirty_len = copied; 328 + folio_attach_private(folio, (void *)((unsigned long)finfo | 329 + NETFS_FOLIO_INFO)); 330 + break; 331 + case NETFS_STREAMING_WRITE_CONT: 332 + finfo = netfs_folio_info(folio); 333 + finfo->dirty_len += copied; 334 + if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { 335 + if (finfo->netfs_group) 336 + folio_change_private(folio, finfo->netfs_group); 337 + else 338 + folio_detach_private(folio); 339 + folio_mark_uptodate(folio); 340 + kfree(finfo); 341 + trace = netfs_streaming_cont_filled_page; 342 + } 343 + break; 344 + default: 345 + WARN(true, "Unexpected modify type %u ix=%lx\n", 346 + howto, folio_index(folio)); 347 + ret = -EIO; 348 + goto error_folio_unlock; 349 + } 350 + 351 + trace_netfs_folio(folio, trace); 352 + 353 + /* Update the inode size if we moved the EOF marker */ 354 + i_size = i_size_read(inode); 355 + pos += copied; 356 + if (pos > i_size) { 357 + if (ctx->ops->update_i_size) { 358 + ctx->ops->update_i_size(inode, pos); 359 + } else { 360 + i_size_write(inode, pos); 361 + #if IS_ENABLED(CONFIG_FSCACHE) 362 + fscache_update_cookie(ctx->cache, NULL, &pos); 363 + #endif 364 + } 365 + } 366 + written += copied; 367 + 368 + if (likely(!wreq)) { 369 + folio_mark_dirty(folio); 370 + } else { 371 + if (folio_test_dirty(folio)) 372 + /* Sigh. mmap. */ 373 + folio_clear_dirty_for_io(folio); 374 + /* We make multiple writes to the folio... */ 375 + if (!folio_test_writeback(folio)) { 376 + folio_wait_fscache(folio); 377 + folio_start_writeback(folio); 378 + folio_start_fscache(folio); 379 + if (wreq->iter.count == 0) 380 + trace_netfs_folio(folio, netfs_folio_trace_wthru); 381 + else 382 + trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 383 + } 384 + netfs_advance_writethrough(wreq, copied, 385 + offset + copied == flen); 386 + } 387 + retry: 388 + folio_unlock(folio); 389 + folio_put(folio); 390 + folio = NULL; 391 + 392 + cond_resched(); 393 + } while (iov_iter_count(iter)); 394 + 395 + out: 396 + if (unlikely(wreq)) { 397 + ret = netfs_end_writethrough(wreq, iocb); 398 + wbc_detach_inode(&wbc); 399 + if (ret == -EIOCBQUEUED) 400 + return ret; 401 + } 402 + 403 + iocb->ki_pos += written; 404 + _leave(" = %zd [%zd]", written, ret); 405 + return written ? written : ret; 406 + 407 + error_folio_unlock: 408 + folio_unlock(folio); 409 + folio_put(folio); 410 + goto out; 411 + } 412 + EXPORT_SYMBOL(netfs_perform_write); 413 + 414 + /** 415 + * netfs_buffered_write_iter_locked - write data to a file 416 + * @iocb: IO state structure (file, offset, etc.) 417 + * @from: iov_iter with data to write 418 + * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 419 + * 420 + * This function does all the work needed for actually writing data to a 421 + * file. It does all basic checks, removes SUID from the file, updates 422 + * modification times and calls proper subroutines depending on whether we 423 + * do direct IO or a standard buffered write. 424 + * 425 + * The caller must hold appropriate locks around this function and have called 426 + * generic_write_checks() already. The caller is also responsible for doing 427 + * any necessary syncing afterwards. 428 + * 429 + * This function does *not* take care of syncing data in case of O_SYNC write. 430 + * A caller has to handle it. This is mainly due to the fact that we want to 431 + * avoid syncing under i_rwsem. 432 + * 433 + * Return: 434 + * * number of bytes written, even for truncated writes 435 + * * negative error code if no data has been written at all 436 + */ 437 + ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, 438 + struct netfs_group *netfs_group) 439 + { 440 + struct file *file = iocb->ki_filp; 441 + ssize_t ret; 442 + 443 + trace_netfs_write_iter(iocb, from); 444 + 445 + ret = file_remove_privs(file); 446 + if (ret) 447 + return ret; 448 + 449 + ret = file_update_time(file); 450 + if (ret) 451 + return ret; 452 + 453 + return netfs_perform_write(iocb, from, netfs_group); 454 + } 455 + EXPORT_SYMBOL(netfs_buffered_write_iter_locked); 456 + 457 + /** 458 + * netfs_file_write_iter - write data to a file 459 + * @iocb: IO state structure 460 + * @from: iov_iter with data to write 461 + * 462 + * Perform a write to a file, writing into the pagecache if possible and doing 463 + * an unbuffered write instead if not. 464 + * 465 + * Return: 466 + * * Negative error code if no data has been written at all of 467 + * vfs_fsync_range() failed for a synchronous write 468 + * * Number of bytes written, even for truncated writes 469 + */ 470 + ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 471 + { 472 + struct file *file = iocb->ki_filp; 473 + struct inode *inode = file->f_mapping->host; 474 + struct netfs_inode *ictx = netfs_inode(inode); 475 + ssize_t ret; 476 + 477 + _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 478 + 479 + if ((iocb->ki_flags & IOCB_DIRECT) || 480 + test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 481 + return netfs_unbuffered_write_iter(iocb, from); 482 + 483 + ret = netfs_start_io_write(inode); 484 + if (ret < 0) 485 + return ret; 486 + 487 + ret = generic_write_checks(iocb, from); 488 + if (ret > 0) 489 + ret = netfs_buffered_write_iter_locked(iocb, from, NULL); 490 + netfs_end_io_write(inode); 491 + if (ret > 0) 492 + ret = generic_write_sync(iocb, ret); 493 + return ret; 494 + } 495 + EXPORT_SYMBOL(netfs_file_write_iter); 496 + 497 + /* 498 + * Notification that a previously read-only page is about to become writable. 499 + * Note that the caller indicates a single page of a multipage folio. 500 + */ 501 + vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) 502 + { 503 + struct folio *folio = page_folio(vmf->page); 504 + struct file *file = vmf->vma->vm_file; 505 + struct inode *inode = file_inode(file); 506 + vm_fault_t ret = VM_FAULT_RETRY; 507 + int err; 508 + 509 + _enter("%lx", folio->index); 510 + 511 + sb_start_pagefault(inode->i_sb); 512 + 513 + if (folio_wait_writeback_killable(folio)) 514 + goto out; 515 + 516 + if (folio_lock_killable(folio) < 0) 517 + goto out; 518 + 519 + /* Can we see a streaming write here? */ 520 + if (WARN_ON(!folio_test_uptodate(folio))) { 521 + ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED; 522 + goto out; 523 + } 524 + 525 + if (netfs_folio_group(folio) != netfs_group) { 526 + folio_unlock(folio); 527 + err = filemap_fdatawait_range(inode->i_mapping, 528 + folio_pos(folio), 529 + folio_pos(folio) + folio_size(folio)); 530 + switch (err) { 531 + case 0: 532 + ret = VM_FAULT_RETRY; 533 + goto out; 534 + case -ENOMEM: 535 + ret = VM_FAULT_OOM; 536 + goto out; 537 + default: 538 + ret = VM_FAULT_SIGBUS; 539 + goto out; 540 + } 541 + } 542 + 543 + if (folio_test_dirty(folio)) 544 + trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); 545 + else 546 + trace_netfs_folio(folio, netfs_folio_trace_mkwrite); 547 + netfs_set_group(folio, netfs_group); 548 + file_update_time(file); 549 + ret = VM_FAULT_LOCKED; 550 + out: 551 + sb_end_pagefault(inode->i_sb); 552 + return ret; 553 + } 554 + EXPORT_SYMBOL(netfs_page_mkwrite); 555 + 556 + /* 557 + * Kill all the pages in the given range 558 + */ 559 + static void netfs_kill_pages(struct address_space *mapping, 560 + loff_t start, loff_t len) 561 + { 562 + struct folio *folio; 563 + pgoff_t index = start / PAGE_SIZE; 564 + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 565 + 566 + _enter("%llx-%llx", start, start + len - 1); 567 + 568 + do { 569 + _debug("kill %lx (to %lx)", index, last); 570 + 571 + folio = filemap_get_folio(mapping, index); 572 + if (IS_ERR(folio)) { 573 + next = index + 1; 574 + continue; 575 + } 576 + 577 + next = folio_next_index(folio); 578 + 579 + trace_netfs_folio(folio, netfs_folio_trace_kill); 580 + folio_clear_uptodate(folio); 581 + if (folio_test_fscache(folio)) 582 + folio_end_fscache(folio); 583 + folio_end_writeback(folio); 584 + folio_lock(folio); 585 + generic_error_remove_folio(mapping, folio); 586 + folio_unlock(folio); 587 + folio_put(folio); 588 + 589 + } while (index = next, index <= last); 590 + 591 + _leave(""); 592 + } 593 + 594 + /* 595 + * Redirty all the pages in a given range. 596 + */ 597 + static void netfs_redirty_pages(struct address_space *mapping, 598 + loff_t start, loff_t len) 599 + { 600 + struct folio *folio; 601 + pgoff_t index = start / PAGE_SIZE; 602 + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 603 + 604 + _enter("%llx-%llx", start, start + len - 1); 605 + 606 + do { 607 + _debug("redirty %llx @%llx", len, start); 608 + 609 + folio = filemap_get_folio(mapping, index); 610 + if (IS_ERR(folio)) { 611 + next = index + 1; 612 + continue; 613 + } 614 + 615 + next = folio_next_index(folio); 616 + trace_netfs_folio(folio, netfs_folio_trace_redirty); 617 + filemap_dirty_folio(mapping, folio); 618 + if (folio_test_fscache(folio)) 619 + folio_end_fscache(folio); 620 + folio_end_writeback(folio); 621 + folio_put(folio); 622 + } while (index = next, index <= last); 623 + 624 + balance_dirty_pages_ratelimited(mapping); 625 + 626 + _leave(""); 627 + } 628 + 629 + /* 630 + * Completion of write to server 631 + */ 632 + static void netfs_pages_written_back(struct netfs_io_request *wreq) 633 + { 634 + struct address_space *mapping = wreq->mapping; 635 + struct netfs_folio *finfo; 636 + struct netfs_group *group = NULL; 637 + struct folio *folio; 638 + pgoff_t last; 639 + int gcount = 0; 640 + 641 + XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); 642 + 643 + _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 644 + 645 + rcu_read_lock(); 646 + 647 + last = (wreq->start + wreq->len - 1) / PAGE_SIZE; 648 + xas_for_each(&xas, folio, last) { 649 + WARN(!folio_test_writeback(folio), 650 + "bad %zx @%llx page %lx %lx\n", 651 + wreq->len, wreq->start, folio_index(folio), last); 652 + 653 + if ((finfo = netfs_folio_info(folio))) { 654 + /* Streaming writes cannot be redirtied whilst under 655 + * writeback, so discard the streaming record. 656 + */ 657 + folio_detach_private(folio); 658 + group = finfo->netfs_group; 659 + gcount++; 660 + trace_netfs_folio(folio, netfs_folio_trace_clear_s); 661 + kfree(finfo); 662 + } else if ((group = netfs_folio_group(folio))) { 663 + /* Need to detach the group pointer if the page didn't 664 + * get redirtied. If it has been redirtied, then it 665 + * must be within the same group. 666 + */ 667 + if (folio_test_dirty(folio)) { 668 + trace_netfs_folio(folio, netfs_folio_trace_redirtied); 669 + goto end_wb; 670 + } 671 + if (folio_trylock(folio)) { 672 + if (!folio_test_dirty(folio)) { 673 + folio_detach_private(folio); 674 + gcount++; 675 + trace_netfs_folio(folio, netfs_folio_trace_clear_g); 676 + } else { 677 + trace_netfs_folio(folio, netfs_folio_trace_redirtied); 678 + } 679 + folio_unlock(folio); 680 + goto end_wb; 681 + } 682 + 683 + xas_pause(&xas); 684 + rcu_read_unlock(); 685 + folio_lock(folio); 686 + if (!folio_test_dirty(folio)) { 687 + folio_detach_private(folio); 688 + gcount++; 689 + trace_netfs_folio(folio, netfs_folio_trace_clear_g); 690 + } else { 691 + trace_netfs_folio(folio, netfs_folio_trace_redirtied); 692 + } 693 + folio_unlock(folio); 694 + rcu_read_lock(); 695 + } else { 696 + trace_netfs_folio(folio, netfs_folio_trace_clear); 697 + } 698 + end_wb: 699 + if (folio_test_fscache(folio)) 700 + folio_end_fscache(folio); 701 + xas_advance(&xas, folio_next_index(folio) - 1); 702 + folio_end_writeback(folio); 703 + } 704 + 705 + rcu_read_unlock(); 706 + netfs_put_group_many(group, gcount); 707 + _leave(""); 708 + } 709 + 710 + /* 711 + * Deal with the disposition of the folios that are under writeback to close 712 + * out the operation. 713 + */ 714 + static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) 715 + { 716 + struct address_space *mapping = wreq->mapping; 717 + 718 + _enter(""); 719 + 720 + switch (wreq->error) { 721 + case 0: 722 + netfs_pages_written_back(wreq); 723 + break; 724 + 725 + default: 726 + pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); 727 + fallthrough; 728 + case -EACCES: 729 + case -EPERM: 730 + case -ENOKEY: 731 + case -EKEYEXPIRED: 732 + case -EKEYREJECTED: 733 + case -EKEYREVOKED: 734 + case -ENETRESET: 735 + case -EDQUOT: 736 + case -ENOSPC: 737 + netfs_redirty_pages(mapping, wreq->start, wreq->len); 738 + break; 739 + 740 + case -EROFS: 741 + case -EIO: 742 + case -EREMOTEIO: 743 + case -EFBIG: 744 + case -ENOENT: 745 + case -ENOMEDIUM: 746 + case -ENXIO: 747 + netfs_kill_pages(mapping, wreq->start, wreq->len); 748 + break; 749 + } 750 + 751 + if (wreq->error) 752 + mapping_set_error(mapping, wreq->error); 753 + if (wreq->netfs_ops->done) 754 + wreq->netfs_ops->done(wreq); 755 + } 756 + 757 + /* 758 + * Extend the region to be written back to include subsequent contiguously 759 + * dirty pages if possible, but don't sleep while doing so. 760 + * 761 + * If this page holds new content, then we can include filler zeros in the 762 + * writeback. 763 + */ 764 + static void netfs_extend_writeback(struct address_space *mapping, 765 + struct netfs_group *group, 766 + struct xa_state *xas, 767 + long *_count, 768 + loff_t start, 769 + loff_t max_len, 770 + bool caching, 771 + size_t *_len, 772 + size_t *_top) 773 + { 774 + struct netfs_folio *finfo; 775 + struct folio_batch fbatch; 776 + struct folio *folio; 777 + unsigned int i; 778 + pgoff_t index = (start + *_len) / PAGE_SIZE; 779 + size_t len; 780 + void *priv; 781 + bool stop = true; 782 + 783 + folio_batch_init(&fbatch); 784 + 785 + do { 786 + /* Firstly, we gather up a batch of contiguous dirty pages 787 + * under the RCU read lock - but we can't clear the dirty flags 788 + * there if any of those pages are mapped. 789 + */ 790 + rcu_read_lock(); 791 + 792 + xas_for_each(xas, folio, ULONG_MAX) { 793 + stop = true; 794 + if (xas_retry(xas, folio)) 795 + continue; 796 + if (xa_is_value(folio)) 797 + break; 798 + if (folio_index(folio) != index) { 799 + xas_reset(xas); 800 + break; 801 + } 802 + 803 + if (!folio_try_get_rcu(folio)) { 804 + xas_reset(xas); 805 + continue; 806 + } 807 + 808 + /* Has the folio moved or been split? */ 809 + if (unlikely(folio != xas_reload(xas))) { 810 + folio_put(folio); 811 + xas_reset(xas); 812 + break; 813 + } 814 + 815 + if (!folio_trylock(folio)) { 816 + folio_put(folio); 817 + xas_reset(xas); 818 + break; 819 + } 820 + if (!folio_test_dirty(folio) || 821 + folio_test_writeback(folio) || 822 + folio_test_fscache(folio)) { 823 + folio_unlock(folio); 824 + folio_put(folio); 825 + xas_reset(xas); 826 + break; 827 + } 828 + 829 + stop = false; 830 + len = folio_size(folio); 831 + priv = folio_get_private(folio); 832 + if ((const struct netfs_group *)priv != group) { 833 + stop = true; 834 + finfo = netfs_folio_info(folio); 835 + if (finfo->netfs_group != group || 836 + finfo->dirty_offset > 0) { 837 + folio_unlock(folio); 838 + folio_put(folio); 839 + xas_reset(xas); 840 + break; 841 + } 842 + len = finfo->dirty_len; 843 + } 844 + 845 + *_top += folio_size(folio); 846 + index += folio_nr_pages(folio); 847 + *_count -= folio_nr_pages(folio); 848 + *_len += len; 849 + if (*_len >= max_len || *_count <= 0) 850 + stop = true; 851 + 852 + if (!folio_batch_add(&fbatch, folio)) 853 + break; 854 + if (stop) 855 + break; 856 + } 857 + 858 + xas_pause(xas); 859 + rcu_read_unlock(); 860 + 861 + /* Now, if we obtained any folios, we can shift them to being 862 + * writable and mark them for caching. 863 + */ 864 + if (!folio_batch_count(&fbatch)) 865 + break; 866 + 867 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 868 + folio = fbatch.folios[i]; 869 + trace_netfs_folio(folio, netfs_folio_trace_store_plus); 870 + 871 + if (!folio_clear_dirty_for_io(folio)) 872 + BUG(); 873 + folio_start_writeback(folio); 874 + netfs_folio_start_fscache(caching, folio); 875 + folio_unlock(folio); 876 + } 877 + 878 + folio_batch_release(&fbatch); 879 + cond_resched(); 880 + } while (!stop); 881 + } 882 + 883 + /* 884 + * Synchronously write back the locked page and any subsequent non-locked dirty 885 + * pages. 886 + */ 887 + static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, 888 + struct writeback_control *wbc, 889 + struct netfs_group *group, 890 + struct xa_state *xas, 891 + struct folio *folio, 892 + unsigned long long start, 893 + unsigned long long end) 894 + { 895 + struct netfs_io_request *wreq; 896 + struct netfs_folio *finfo; 897 + struct netfs_inode *ctx = netfs_inode(mapping->host); 898 + unsigned long long i_size = i_size_read(&ctx->inode); 899 + size_t len, max_len; 900 + bool caching = netfs_is_cache_enabled(ctx); 901 + long count = wbc->nr_to_write; 902 + int ret; 903 + 904 + _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching); 905 + 906 + wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), 907 + NETFS_WRITEBACK); 908 + if (IS_ERR(wreq)) { 909 + folio_unlock(folio); 910 + return PTR_ERR(wreq); 911 + } 912 + 913 + if (!folio_clear_dirty_for_io(folio)) 914 + BUG(); 915 + folio_start_writeback(folio); 916 + netfs_folio_start_fscache(caching, folio); 917 + 918 + count -= folio_nr_pages(folio); 919 + 920 + /* Find all consecutive lockable dirty pages that have contiguous 921 + * written regions, stopping when we find a page that is not 922 + * immediately lockable, is not dirty or is missing, or we reach the 923 + * end of the range. 924 + */ 925 + trace_netfs_folio(folio, netfs_folio_trace_store); 926 + 927 + len = wreq->len; 928 + finfo = netfs_folio_info(folio); 929 + if (finfo) { 930 + start += finfo->dirty_offset; 931 + if (finfo->dirty_offset + finfo->dirty_len != len) { 932 + len = finfo->dirty_len; 933 + goto cant_expand; 934 + } 935 + len = finfo->dirty_len; 936 + } 937 + 938 + if (start < i_size) { 939 + /* Trim the write to the EOF; the extra data is ignored. Also 940 + * put an upper limit on the size of a single storedata op. 941 + */ 942 + max_len = 65536 * 4096; 943 + max_len = min_t(unsigned long long, max_len, end - start + 1); 944 + max_len = min_t(unsigned long long, max_len, i_size - start); 945 + 946 + if (len < max_len) 947 + netfs_extend_writeback(mapping, group, xas, &count, start, 948 + max_len, caching, &len, &wreq->upper_len); 949 + } 950 + 951 + cant_expand: 952 + len = min_t(unsigned long long, len, i_size - start); 953 + 954 + /* We now have a contiguous set of dirty pages, each with writeback 955 + * set; the first page is still locked at this point, but all the rest 956 + * have been unlocked. 957 + */ 958 + folio_unlock(folio); 959 + wreq->start = start; 960 + wreq->len = len; 961 + 962 + if (start < i_size) { 963 + _debug("write back %zx @%llx [%llx]", len, start, i_size); 964 + 965 + /* Speculatively write to the cache. We have to fix this up 966 + * later if the store fails. 967 + */ 968 + wreq->cleanup = netfs_cleanup_buffered_write; 969 + 970 + iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, 971 + wreq->upper_len); 972 + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 973 + ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); 974 + if (ret == 0 || ret == -EIOCBQUEUED) 975 + wbc->nr_to_write -= len / PAGE_SIZE; 976 + } else { 977 + _debug("write discard %zx @%llx [%llx]", len, start, i_size); 978 + 979 + /* The dirty region was entirely beyond the EOF. */ 980 + fscache_clear_page_bits(mapping, start, len, caching); 981 + netfs_pages_written_back(wreq); 982 + ret = 0; 983 + } 984 + 985 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 986 + _leave(" = 1"); 987 + return 1; 988 + } 989 + 990 + /* 991 + * Write a region of pages back to the server 992 + */ 993 + static ssize_t netfs_writepages_begin(struct address_space *mapping, 994 + struct writeback_control *wbc, 995 + struct netfs_group *group, 996 + struct xa_state *xas, 997 + unsigned long long *_start, 998 + unsigned long long end) 999 + { 1000 + const struct netfs_folio *finfo; 1001 + struct folio *folio; 1002 + unsigned long long start = *_start; 1003 + ssize_t ret; 1004 + void *priv; 1005 + int skips = 0; 1006 + 1007 + _enter("%llx,%llx,", start, end); 1008 + 1009 + search_again: 1010 + /* Find the first dirty page in the group. */ 1011 + rcu_read_lock(); 1012 + 1013 + for (;;) { 1014 + folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 1015 + if (xas_retry(xas, folio) || xa_is_value(folio)) 1016 + continue; 1017 + if (!folio) 1018 + break; 1019 + 1020 + if (!folio_try_get_rcu(folio)) { 1021 + xas_reset(xas); 1022 + continue; 1023 + } 1024 + 1025 + if (unlikely(folio != xas_reload(xas))) { 1026 + folio_put(folio); 1027 + xas_reset(xas); 1028 + continue; 1029 + } 1030 + 1031 + /* Skip any dirty folio that's not in the group of interest. */ 1032 + priv = folio_get_private(folio); 1033 + if ((const struct netfs_group *)priv != group) { 1034 + finfo = netfs_folio_info(folio); 1035 + if (finfo->netfs_group != group) { 1036 + folio_put(folio); 1037 + continue; 1038 + } 1039 + } 1040 + 1041 + xas_pause(xas); 1042 + break; 1043 + } 1044 + rcu_read_unlock(); 1045 + if (!folio) 1046 + return 0; 1047 + 1048 + start = folio_pos(folio); /* May regress with THPs */ 1049 + 1050 + _debug("wback %lx", folio_index(folio)); 1051 + 1052 + /* At this point we hold neither the i_pages lock nor the page lock: 1053 + * the page may be truncated or invalidated (changing page->mapping to 1054 + * NULL), or even swizzled back from swapper_space to tmpfs file 1055 + * mapping 1056 + */ 1057 + lock_again: 1058 + if (wbc->sync_mode != WB_SYNC_NONE) { 1059 + ret = folio_lock_killable(folio); 1060 + if (ret < 0) 1061 + return ret; 1062 + } else { 1063 + if (!folio_trylock(folio)) 1064 + goto search_again; 1065 + } 1066 + 1067 + if (folio->mapping != mapping || 1068 + !folio_test_dirty(folio)) { 1069 + start += folio_size(folio); 1070 + folio_unlock(folio); 1071 + goto search_again; 1072 + } 1073 + 1074 + if (folio_test_writeback(folio) || 1075 + folio_test_fscache(folio)) { 1076 + folio_unlock(folio); 1077 + if (wbc->sync_mode != WB_SYNC_NONE) { 1078 + folio_wait_writeback(folio); 1079 + #ifdef CONFIG_FSCACHE 1080 + folio_wait_fscache(folio); 1081 + #endif 1082 + goto lock_again; 1083 + } 1084 + 1085 + start += folio_size(folio); 1086 + if (wbc->sync_mode == WB_SYNC_NONE) { 1087 + if (skips >= 5 || need_resched()) { 1088 + ret = 0; 1089 + goto out; 1090 + } 1091 + skips++; 1092 + } 1093 + goto search_again; 1094 + } 1095 + 1096 + ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, 1097 + folio, start, end); 1098 + out: 1099 + if (ret > 0) 1100 + *_start = start + ret; 1101 + _leave(" = %zd [%llx]", ret, *_start); 1102 + return ret; 1103 + } 1104 + 1105 + /* 1106 + * Write a region of pages back to the server 1107 + */ 1108 + static int netfs_writepages_region(struct address_space *mapping, 1109 + struct writeback_control *wbc, 1110 + struct netfs_group *group, 1111 + unsigned long long *_start, 1112 + unsigned long long end) 1113 + { 1114 + ssize_t ret; 1115 + 1116 + XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 1117 + 1118 + do { 1119 + ret = netfs_writepages_begin(mapping, wbc, group, &xas, 1120 + _start, end); 1121 + if (ret > 0 && wbc->nr_to_write > 0) 1122 + cond_resched(); 1123 + } while (ret > 0 && wbc->nr_to_write > 0); 1124 + 1125 + return ret > 0 ? 0 : ret; 1126 + } 1127 + 1128 + /* 1129 + * write some of the pending data back to the server 1130 + */ 1131 + int netfs_writepages(struct address_space *mapping, 1132 + struct writeback_control *wbc) 1133 + { 1134 + struct netfs_group *group = NULL; 1135 + loff_t start, end; 1136 + int ret; 1137 + 1138 + _enter(""); 1139 + 1140 + /* We have to be careful as we can end up racing with setattr() 1141 + * truncating the pagecache since the caller doesn't take a lock here 1142 + * to prevent it. 1143 + */ 1144 + 1145 + if (wbc->range_cyclic && mapping->writeback_index) { 1146 + start = mapping->writeback_index * PAGE_SIZE; 1147 + ret = netfs_writepages_region(mapping, wbc, group, 1148 + &start, LLONG_MAX); 1149 + if (ret < 0) 1150 + goto out; 1151 + 1152 + if (wbc->nr_to_write <= 0) { 1153 + mapping->writeback_index = start / PAGE_SIZE; 1154 + goto out; 1155 + } 1156 + 1157 + start = 0; 1158 + end = mapping->writeback_index * PAGE_SIZE; 1159 + mapping->writeback_index = 0; 1160 + ret = netfs_writepages_region(mapping, wbc, group, &start, end); 1161 + if (ret == 0) 1162 + mapping->writeback_index = start / PAGE_SIZE; 1163 + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 1164 + start = 0; 1165 + ret = netfs_writepages_region(mapping, wbc, group, 1166 + &start, LLONG_MAX); 1167 + if (wbc->nr_to_write > 0 && ret == 0) 1168 + mapping->writeback_index = start / PAGE_SIZE; 1169 + } else { 1170 + start = wbc->range_start; 1171 + ret = netfs_writepages_region(mapping, wbc, group, 1172 + &start, wbc->range_end); 1173 + } 1174 + 1175 + out: 1176 + _leave(" = %d", ret); 1177 + return ret; 1178 + } 1179 + EXPORT_SYMBOL(netfs_writepages); 1180 + 1181 + /* 1182 + * Deal with the disposition of a laundered folio. 1183 + */ 1184 + static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) 1185 + { 1186 + if (wreq->error) { 1187 + pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); 1188 + mapping_set_error(wreq->mapping, wreq->error); 1189 + } 1190 + } 1191 + 1192 + /** 1193 + * netfs_launder_folio - Clean up a dirty folio that's being invalidated 1194 + * @folio: The folio to clean 1195 + * 1196 + * This is called to write back a folio that's being invalidated when an inode 1197 + * is getting torn down. Ideally, writepages would be used instead. 1198 + */ 1199 + int netfs_launder_folio(struct folio *folio) 1200 + { 1201 + struct netfs_io_request *wreq; 1202 + struct address_space *mapping = folio->mapping; 1203 + struct netfs_folio *finfo = netfs_folio_info(folio); 1204 + struct netfs_group *group = netfs_folio_group(folio); 1205 + struct bio_vec bvec; 1206 + unsigned long long i_size = i_size_read(mapping->host); 1207 + unsigned long long start = folio_pos(folio); 1208 + size_t offset = 0, len; 1209 + int ret = 0; 1210 + 1211 + if (finfo) { 1212 + offset = finfo->dirty_offset; 1213 + start += offset; 1214 + len = finfo->dirty_len; 1215 + } else { 1216 + len = folio_size(folio); 1217 + } 1218 + len = min_t(unsigned long long, len, i_size - start); 1219 + 1220 + wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); 1221 + if (IS_ERR(wreq)) { 1222 + ret = PTR_ERR(wreq); 1223 + goto out; 1224 + } 1225 + 1226 + if (!folio_clear_dirty_for_io(folio)) 1227 + goto out_put; 1228 + 1229 + trace_netfs_folio(folio, netfs_folio_trace_launder); 1230 + 1231 + _debug("launder %llx-%llx", start, start + len - 1); 1232 + 1233 + /* Speculatively write to the cache. We have to fix this up later if 1234 + * the store fails. 1235 + */ 1236 + wreq->cleanup = netfs_cleanup_launder_folio; 1237 + 1238 + bvec_set_folio(&bvec, folio, len, offset); 1239 + iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); 1240 + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 1241 + ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); 1242 + 1243 + out_put: 1244 + folio_detach_private(folio); 1245 + netfs_put_group(group); 1246 + kfree(finfo); 1247 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1248 + out: 1249 + folio_wait_fscache(folio); 1250 + _leave(" = %d", ret); 1251 + return ret; 1252 + } 1253 + EXPORT_SYMBOL(netfs_launder_folio);

+125

fs/netfs/direct_read.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* Direct I/O support. 3 + * 4 + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/export.h> 9 + #include <linux/fs.h> 10 + #include <linux/mm.h> 11 + #include <linux/pagemap.h> 12 + #include <linux/slab.h> 13 + #include <linux/uio.h> 14 + #include <linux/sched/mm.h> 15 + #include <linux/task_io_accounting_ops.h> 16 + #include <linux/netfs.h> 17 + #include "internal.h" 18 + 19 + /** 20 + * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read 21 + * @iocb: The I/O control descriptor describing the read 22 + * @iter: The output buffer (also specifies read length) 23 + * 24 + * Perform an unbuffered I/O or direct I/O from the file in @iocb to the 25 + * output buffer. No use is made of the pagecache. 26 + * 27 + * The caller must hold any appropriate locks. 28 + */ 29 + static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter) 30 + { 31 + struct netfs_io_request *rreq; 32 + ssize_t ret; 33 + size_t orig_count = iov_iter_count(iter); 34 + bool async = !is_sync_kiocb(iocb); 35 + 36 + _enter(""); 37 + 38 + if (!orig_count) 39 + return 0; /* Don't update atime */ 40 + 41 + ret = kiocb_write_and_wait(iocb, orig_count); 42 + if (ret < 0) 43 + return ret; 44 + file_accessed(iocb->ki_filp); 45 + 46 + rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, 47 + iocb->ki_pos, orig_count, 48 + NETFS_DIO_READ); 49 + if (IS_ERR(rreq)) 50 + return PTR_ERR(rreq); 51 + 52 + netfs_stat(&netfs_n_rh_dio_read); 53 + trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read); 54 + 55 + /* If this is an async op, we have to keep track of the destination 56 + * buffer for ourselves as the caller's iterator will be trashed when 57 + * we return. 58 + * 59 + * In such a case, extract an iterator to represent as much of the the 60 + * output buffer as we can manage. Note that the extraction might not 61 + * be able to allocate a sufficiently large bvec array and may shorten 62 + * the request. 63 + */ 64 + if (user_backed_iter(iter)) { 65 + ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0); 66 + if (ret < 0) 67 + goto out; 68 + rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec; 69 + rreq->direct_bv_count = ret; 70 + rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); 71 + rreq->len = iov_iter_count(&rreq->iter); 72 + } else { 73 + rreq->iter = *iter; 74 + rreq->len = orig_count; 75 + rreq->direct_bv_unpin = false; 76 + iov_iter_advance(iter, orig_count); 77 + } 78 + 79 + // TODO: Set up bounce buffer if needed 80 + 81 + if (async) 82 + rreq->iocb = iocb; 83 + 84 + ret = netfs_begin_read(rreq, is_sync_kiocb(iocb)); 85 + if (ret < 0) 86 + goto out; /* May be -EIOCBQUEUED */ 87 + if (!async) { 88 + // TODO: Copy from bounce buffer 89 + iocb->ki_pos += rreq->transferred; 90 + ret = rreq->transferred; 91 + } 92 + 93 + out: 94 + netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 95 + if (ret > 0) 96 + orig_count -= ret; 97 + if (ret != -EIOCBQUEUED) 98 + iov_iter_revert(iter, orig_count - iov_iter_count(iter)); 99 + return ret; 100 + } 101 + 102 + /** 103 + * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read 104 + * @iocb: The I/O control descriptor describing the read 105 + * @iter: The output buffer (also specifies read length) 106 + * 107 + * Perform an unbuffered I/O or direct I/O from the file in @iocb to the 108 + * output buffer. No use is made of the pagecache. 109 + */ 110 + ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 111 + { 112 + struct inode *inode = file_inode(iocb->ki_filp); 113 + ssize_t ret; 114 + 115 + if (!iter->count) 116 + return 0; /* Don't update atime */ 117 + 118 + ret = netfs_start_io_direct(inode); 119 + if (ret == 0) { 120 + ret = netfs_unbuffered_read_iter_locked(iocb, iter); 121 + netfs_end_io_direct(inode); 122 + } 123 + return ret; 124 + } 125 + EXPORT_SYMBOL(netfs_unbuffered_read_iter);

+171

fs/netfs/direct_write.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* Unbuffered and direct write support. 3 + * 4 + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/export.h> 9 + #include <linux/uio.h> 10 + #include "internal.h" 11 + 12 + static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) 13 + { 14 + struct inode *inode = wreq->inode; 15 + unsigned long long end = wreq->start + wreq->len; 16 + 17 + if (!wreq->error && 18 + i_size_read(inode) < end) { 19 + if (wreq->netfs_ops->update_i_size) 20 + wreq->netfs_ops->update_i_size(inode, end); 21 + else 22 + i_size_write(inode, end); 23 + } 24 + } 25 + 26 + /* 27 + * Perform an unbuffered write where we may have to do an RMW operation on an 28 + * encrypted file. This can also be used for direct I/O writes. 29 + */ 30 + static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter, 31 + struct netfs_group *netfs_group) 32 + { 33 + struct netfs_io_request *wreq; 34 + unsigned long long start = iocb->ki_pos; 35 + unsigned long long end = start + iov_iter_count(iter); 36 + ssize_t ret, n; 37 + bool async = !is_sync_kiocb(iocb); 38 + 39 + _enter(""); 40 + 41 + /* We're going to need a bounce buffer if what we transmit is going to 42 + * be different in some way to the source buffer, e.g. because it gets 43 + * encrypted/compressed or because it needs expanding to a block size. 44 + */ 45 + // TODO 46 + 47 + _debug("uw %llx-%llx", start, end); 48 + 49 + wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, 50 + start, end - start, 51 + iocb->ki_flags & IOCB_DIRECT ? 52 + NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); 53 + if (IS_ERR(wreq)) 54 + return PTR_ERR(wreq); 55 + 56 + { 57 + /* If this is an async op and we're not using a bounce buffer, 58 + * we have to save the source buffer as the iterator is only 59 + * good until we return. In such a case, extract an iterator 60 + * to represent as much of the the output buffer as we can 61 + * manage. Note that the extraction might not be able to 62 + * allocate a sufficiently large bvec array and may shorten the 63 + * request. 64 + */ 65 + if (async || user_backed_iter(iter)) { 66 + n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0); 67 + if (n < 0) { 68 + ret = n; 69 + goto out; 70 + } 71 + wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec; 72 + wreq->direct_bv_count = n; 73 + wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); 74 + wreq->len = iov_iter_count(&wreq->iter); 75 + } else { 76 + wreq->iter = *iter; 77 + } 78 + 79 + wreq->io_iter = wreq->iter; 80 + } 81 + 82 + /* Copy the data into the bounce buffer and encrypt it. */ 83 + // TODO 84 + 85 + /* Dispatch the write. */ 86 + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 87 + if (async) 88 + wreq->iocb = iocb; 89 + wreq->cleanup = netfs_cleanup_dio_write; 90 + ret = netfs_begin_write(wreq, is_sync_kiocb(iocb), 91 + iocb->ki_flags & IOCB_DIRECT ? 92 + netfs_write_trace_dio_write : 93 + netfs_write_trace_unbuffered_write); 94 + if (ret < 0) { 95 + _debug("begin = %zd", ret); 96 + goto out; 97 + } 98 + 99 + if (!async) { 100 + trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); 101 + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 102 + TASK_UNINTERRUPTIBLE); 103 + 104 + ret = wreq->error; 105 + _debug("waited = %zd", ret); 106 + if (ret == 0) { 107 + ret = wreq->transferred; 108 + iocb->ki_pos += ret; 109 + } 110 + } else { 111 + ret = -EIOCBQUEUED; 112 + } 113 + 114 + out: 115 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 116 + return ret; 117 + } 118 + 119 + /** 120 + * netfs_unbuffered_write_iter - Unbuffered write to a file 121 + * @iocb: IO state structure 122 + * @from: iov_iter with data to write 123 + * 124 + * Do an unbuffered write to a file, writing the data directly to the server 125 + * and not lodging the data in the pagecache. 126 + * 127 + * Return: 128 + * * Negative error code if no data has been written at all of 129 + * vfs_fsync_range() failed for a synchronous write 130 + * * Number of bytes written, even for truncated writes 131 + */ 132 + ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) 133 + { 134 + struct file *file = iocb->ki_filp; 135 + struct inode *inode = file->f_mapping->host; 136 + struct netfs_inode *ictx = netfs_inode(inode); 137 + unsigned long long end; 138 + ssize_t ret; 139 + 140 + _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 141 + 142 + trace_netfs_write_iter(iocb, from); 143 + netfs_stat(&netfs_n_rh_dio_write); 144 + 145 + ret = netfs_start_io_direct(inode); 146 + if (ret < 0) 147 + return ret; 148 + ret = generic_write_checks(iocb, from); 149 + if (ret < 0) 150 + goto out; 151 + ret = file_remove_privs(file); 152 + if (ret < 0) 153 + goto out; 154 + ret = file_update_time(file); 155 + if (ret < 0) 156 + goto out; 157 + ret = kiocb_invalidate_pages(iocb, iov_iter_count(from)); 158 + if (ret < 0) 159 + goto out; 160 + end = iocb->ki_pos + iov_iter_count(from); 161 + if (end > ictx->zero_point) 162 + ictx->zero_point = end; 163 + 164 + fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode), 165 + FSCACHE_INVAL_DIO_WRITE); 166 + ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL); 167 + out: 168 + netfs_end_io_direct(inode); 169 + return ret; 170 + } 171 + EXPORT_SYMBOL(netfs_unbuffered_write_iter);

+14

fs/netfs/fscache_internal.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* Internal definitions for FS-Cache 3 + * 4 + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include "internal.h" 9 + 10 + #ifdef pr_fmt 11 + #undef pr_fmt 12 + #endif 13 + 14 + #define pr_fmt(fmt) "FS-Cache: " fmt

+284

fs/netfs/internal.h

··· 5 5 * Written by David Howells (dhowells@redhat.com) 6 6 */ 7 7 8 + #include <linux/slab.h> 9 + #include <linux/seq_file.h> 8 10 #include <linux/netfs.h> 9 11 #include <linux/fscache.h> 12 + #include <linux/fscache-cache.h> 10 13 #include <trace/events/netfs.h> 14 + #include <trace/events/fscache.h> 11 15 12 16 #ifdef pr_fmt 13 17 #undef pr_fmt ··· 23 19 * buffered_read.c 24 20 */ 25 21 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq); 22 + int netfs_prefetch_for_write(struct file *file, struct folio *folio, 23 + size_t offset, size_t len); 26 24 27 25 /* 28 26 * io.c ··· 35 29 * main.c 36 30 */ 37 31 extern unsigned int netfs_debug; 32 + extern struct list_head netfs_io_requests; 33 + extern spinlock_t netfs_proc_lock; 34 + 35 + #ifdef CONFIG_PROC_FS 36 + static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) 37 + { 38 + spin_lock(&netfs_proc_lock); 39 + list_add_tail_rcu(&rreq->proc_link, &netfs_io_requests); 40 + spin_unlock(&netfs_proc_lock); 41 + } 42 + static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) 43 + { 44 + if (!list_empty(&rreq->proc_link)) { 45 + spin_lock(&netfs_proc_lock); 46 + list_del_rcu(&rreq->proc_link); 47 + spin_unlock(&netfs_proc_lock); 48 + } 49 + } 50 + #else 51 + static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {} 52 + static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} 53 + #endif 54 + 55 + /* 56 + * misc.c 57 + */ 58 + #define NETFS_FLAG_PUT_MARK BIT(0) 59 + #define NETFS_FLAG_PAGECACHE_MARK BIT(1) 60 + int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, 61 + struct folio *folio, unsigned int flags, 62 + gfp_t gfp_mask); 63 + int netfs_add_folios_to_buffer(struct xarray *buffer, 64 + struct address_space *mapping, 65 + pgoff_t index, pgoff_t to, gfp_t gfp_mask); 66 + void netfs_clear_buffer(struct xarray *buffer); 38 67 39 68 /* 40 69 * objects.c ··· 91 50 } 92 51 93 52 /* 53 + * output.c 54 + */ 55 + int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 56 + enum netfs_write_trace what); 57 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 58 + int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end); 59 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb); 60 + 61 + /* 94 62 * stats.c 95 63 */ 96 64 #ifdef CONFIG_NETFS_STATS 65 + extern atomic_t netfs_n_rh_dio_read; 66 + extern atomic_t netfs_n_rh_dio_write; 97 67 extern atomic_t netfs_n_rh_readahead; 98 68 extern atomic_t netfs_n_rh_readpage; 99 69 extern atomic_t netfs_n_rh_rreq; ··· 123 71 extern atomic_t netfs_n_rh_write_done; 124 72 extern atomic_t netfs_n_rh_write_failed; 125 73 extern atomic_t netfs_n_rh_write_zskip; 74 + extern atomic_t netfs_n_wh_wstream_conflict; 75 + extern atomic_t netfs_n_wh_upload; 76 + extern atomic_t netfs_n_wh_upload_done; 77 + extern atomic_t netfs_n_wh_upload_failed; 78 + extern atomic_t netfs_n_wh_write; 79 + extern atomic_t netfs_n_wh_write_done; 80 + extern atomic_t netfs_n_wh_write_failed; 126 81 82 + int netfs_stats_show(struct seq_file *m, void *v); 127 83 128 84 static inline void netfs_stat(atomic_t *stat) 129 85 { ··· 162 102 return false; 163 103 #endif 164 104 } 105 + 106 + /* 107 + * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap). 108 + */ 109 + static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) 110 + { 111 + if (netfs_group) 112 + refcount_inc(&netfs_group->ref); 113 + return netfs_group; 114 + } 115 + 116 + /* 117 + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). 118 + */ 119 + static inline void netfs_put_group(struct netfs_group *netfs_group) 120 + { 121 + if (netfs_group && refcount_dec_and_test(&netfs_group->ref)) 122 + netfs_group->free(netfs_group); 123 + } 124 + 125 + /* 126 + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). 127 + */ 128 + static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) 129 + { 130 + if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref)) 131 + netfs_group->free(netfs_group); 132 + } 133 + 134 + /* 135 + * fscache-cache.c 136 + */ 137 + #ifdef CONFIG_PROC_FS 138 + extern const struct seq_operations fscache_caches_seq_ops; 139 + #endif 140 + bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); 141 + void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); 142 + struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache); 143 + void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where); 144 + 145 + static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache) 146 + { 147 + return smp_load_acquire(&cache->state); 148 + } 149 + 150 + static inline bool fscache_cache_is_live(const struct fscache_cache *cache) 151 + { 152 + return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE; 153 + } 154 + 155 + static inline void fscache_set_cache_state(struct fscache_cache *cache, 156 + enum fscache_cache_state new_state) 157 + { 158 + smp_store_release(&cache->state, new_state); 159 + 160 + } 161 + 162 + static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, 163 + enum fscache_cache_state old_state, 164 + enum fscache_cache_state new_state) 165 + { 166 + return try_cmpxchg_release(&cache->state, &old_state, new_state); 167 + } 168 + 169 + /* 170 + * fscache-cookie.c 171 + */ 172 + extern struct kmem_cache *fscache_cookie_jar; 173 + #ifdef CONFIG_PROC_FS 174 + extern const struct seq_operations fscache_cookies_seq_ops; 175 + #endif 176 + extern struct timer_list fscache_cookie_lru_timer; 177 + 178 + extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); 179 + extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie, 180 + enum fscache_access_trace why); 181 + 182 + static inline void fscache_see_cookie(struct fscache_cookie *cookie, 183 + enum fscache_cookie_trace where) 184 + { 185 + trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), 186 + where); 187 + } 188 + 189 + /* 190 + * fscache-main.c 191 + */ 192 + extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len); 193 + #ifdef CONFIG_FSCACHE 194 + int __init fscache_init(void); 195 + void __exit fscache_exit(void); 196 + #else 197 + static inline int fscache_init(void) { return 0; } 198 + static inline void fscache_exit(void) {} 199 + #endif 200 + 201 + /* 202 + * fscache-proc.c 203 + */ 204 + #ifdef CONFIG_PROC_FS 205 + extern int __init fscache_proc_init(void); 206 + extern void fscache_proc_cleanup(void); 207 + #else 208 + #define fscache_proc_init() (0) 209 + #define fscache_proc_cleanup() do {} while (0) 210 + #endif 211 + 212 + /* 213 + * fscache-stats.c 214 + */ 215 + #ifdef CONFIG_FSCACHE_STATS 216 + extern atomic_t fscache_n_volumes; 217 + extern atomic_t fscache_n_volumes_collision; 218 + extern atomic_t fscache_n_volumes_nomem; 219 + extern atomic_t fscache_n_cookies; 220 + extern atomic_t fscache_n_cookies_lru; 221 + extern atomic_t fscache_n_cookies_lru_expired; 222 + extern atomic_t fscache_n_cookies_lru_removed; 223 + extern atomic_t fscache_n_cookies_lru_dropped; 224 + 225 + extern atomic_t fscache_n_acquires; 226 + extern atomic_t fscache_n_acquires_ok; 227 + extern atomic_t fscache_n_acquires_oom; 228 + 229 + extern atomic_t fscache_n_invalidates; 230 + 231 + extern atomic_t fscache_n_relinquishes; 232 + extern atomic_t fscache_n_relinquishes_retire; 233 + extern atomic_t fscache_n_relinquishes_dropped; 234 + 235 + extern atomic_t fscache_n_resizes; 236 + extern atomic_t fscache_n_resizes_null; 237 + 238 + static inline void fscache_stat(atomic_t *stat) 239 + { 240 + atomic_inc(stat); 241 + } 242 + 243 + static inline void fscache_stat_d(atomic_t *stat) 244 + { 245 + atomic_dec(stat); 246 + } 247 + 248 + #define __fscache_stat(stat) (stat) 249 + 250 + int fscache_stats_show(struct seq_file *m); 251 + #else 252 + 253 + #define __fscache_stat(stat) (NULL) 254 + #define fscache_stat(stat) do {} while (0) 255 + #define fscache_stat_d(stat) do {} while (0) 256 + 257 + static inline int fscache_stats_show(struct seq_file *m) { return 0; } 258 + #endif 259 + 260 + /* 261 + * fscache-volume.c 262 + */ 263 + #ifdef CONFIG_PROC_FS 264 + extern const struct seq_operations fscache_volumes_seq_ops; 265 + #endif 266 + 267 + struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, 268 + enum fscache_volume_trace where); 269 + void fscache_put_volume(struct fscache_volume *volume, 270 + enum fscache_volume_trace where); 271 + bool fscache_begin_volume_access(struct fscache_volume *volume, 272 + struct fscache_cookie *cookie, 273 + enum fscache_access_trace why); 274 + void fscache_create_volume(struct fscache_volume *volume, bool wait); 165 275 166 276 /*****************************************************************************/ 167 277 /* ··· 373 143 #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) 374 144 #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) 375 145 #endif 146 + 147 + /* 148 + * assertions 149 + */ 150 + #if 1 /* defined(__KDEBUGALL) */ 151 + 152 + #define ASSERT(X) \ 153 + do { \ 154 + if (unlikely(!(X))) { \ 155 + pr_err("\n"); \ 156 + pr_err("Assertion failed\n"); \ 157 + BUG(); \ 158 + } \ 159 + } while (0) 160 + 161 + #define ASSERTCMP(X, OP, Y) \ 162 + do { \ 163 + if (unlikely(!((X) OP (Y)))) { \ 164 + pr_err("\n"); \ 165 + pr_err("Assertion failed\n"); \ 166 + pr_err("%lx " #OP " %lx is false\n", \ 167 + (unsigned long)(X), (unsigned long)(Y)); \ 168 + BUG(); \ 169 + } \ 170 + } while (0) 171 + 172 + #define ASSERTIF(C, X) \ 173 + do { \ 174 + if (unlikely((C) && !(X))) { \ 175 + pr_err("\n"); \ 176 + pr_err("Assertion failed\n"); \ 177 + BUG(); \ 178 + } \ 179 + } while (0) 180 + 181 + #define ASSERTIFCMP(C, X, OP, Y) \ 182 + do { \ 183 + if (unlikely((C) && !((X) OP (Y)))) { \ 184 + pr_err("\n"); \ 185 + pr_err("Assertion failed\n"); \ 186 + pr_err("%lx " #OP " %lx is false\n", \ 187 + (unsigned long)(X), (unsigned long)(Y)); \ 188 + BUG(); \ 189 + } \ 190 + } while (0) 191 + 192 + #else 193 + 194 + #define ASSERT(X) do {} while (0) 195 + #define ASSERTCMP(X, OP, Y) do {} while (0) 196 + #define ASSERTIF(C, X) do {} while (0) 197 + #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) 198 + 199 + #endif /* assert or not */

+170 -45

fs/netfs/io.c

··· 21 21 */ 22 22 static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 23 23 { 24 - struct iov_iter iter; 25 - 26 - iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages, 27 - subreq->start + subreq->transferred, 28 - subreq->len - subreq->transferred); 29 - iov_iter_zero(iov_iter_count(&iter), &iter); 24 + iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 30 25 } 31 26 32 27 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, ··· 41 46 enum netfs_read_from_hole read_hole) 42 47 { 43 48 struct netfs_cache_resources *cres = &rreq->cache_resources; 44 - struct iov_iter iter; 45 49 46 50 netfs_stat(&netfs_n_rh_read); 47 - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, 48 - subreq->start + subreq->transferred, 49 - subreq->len - subreq->transferred); 50 - 51 - cres->ops->read(cres, subreq->start, &iter, read_hole, 51 + cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole, 52 52 netfs_cache_read_terminated, subreq); 53 53 } 54 54 ··· 78 88 struct netfs_io_subrequest *subreq) 79 89 { 80 90 netfs_stat(&netfs_n_rh_download); 91 + 92 + if (rreq->origin != NETFS_DIO_READ && 93 + iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) 94 + pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n", 95 + rreq->debug_id, subreq->debug_index, 96 + iov_iter_count(&subreq->io_iter), subreq->len, 97 + subreq->transferred, subreq->flags); 81 98 rreq->netfs_ops->issue_read(subreq); 82 99 } 83 100 ··· 126 129 */ 127 130 if (have_unlocked && folio_index(folio) <= unlocked) 128 131 continue; 129 - unlocked = folio_index(folio); 132 + unlocked = folio_next_index(folio) - 1; 133 + trace_netfs_folio(folio, netfs_folio_trace_end_copy); 130 134 folio_end_fscache(folio); 131 135 have_unlocked = true; 132 136 } ··· 199 201 } 200 202 201 203 ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, 202 - rreq->i_size, true); 204 + subreq->len, rreq->i_size, true); 203 205 if (ret < 0) { 204 206 trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); 205 207 trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); ··· 258 260 } 259 261 260 262 /* 263 + * Reset the subrequest iterator prior to resubmission. 264 + */ 265 + static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, 266 + struct netfs_io_subrequest *subreq) 267 + { 268 + size_t remaining = subreq->len - subreq->transferred; 269 + size_t count = iov_iter_count(&subreq->io_iter); 270 + 271 + if (count == remaining) 272 + return; 273 + 274 + _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", 275 + rreq->debug_id, subreq->debug_index, 276 + iov_iter_count(&subreq->io_iter), subreq->transferred, 277 + subreq->len, rreq->i_size, 278 + subreq->io_iter.iter_type); 279 + 280 + if (count < remaining) 281 + iov_iter_revert(&subreq->io_iter, remaining - count); 282 + else 283 + iov_iter_advance(&subreq->io_iter, count - remaining); 284 + } 285 + 286 + /* 261 287 * Resubmit any short or failed operations. Returns true if we got the rreq 262 288 * ref back. 263 289 */ ··· 309 287 trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); 310 288 netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 311 289 atomic_inc(&rreq->nr_outstanding); 290 + netfs_reset_subreq_iter(rreq, subreq); 312 291 netfs_read_from_server(rreq, subreq); 313 292 } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { 314 293 netfs_rreq_short_read(rreq, subreq); ··· 344 321 } 345 322 346 323 /* 324 + * Determine how much we can admit to having read from a DIO read. 325 + */ 326 + static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 327 + { 328 + struct netfs_io_subrequest *subreq; 329 + unsigned int i; 330 + size_t transferred = 0; 331 + 332 + for (i = 0; i < rreq->direct_bv_count; i++) 333 + flush_dcache_page(rreq->direct_bv[i].bv_page); 334 + 335 + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 336 + if (subreq->error || subreq->transferred == 0) 337 + break; 338 + transferred += subreq->transferred; 339 + if (subreq->transferred < subreq->len) 340 + break; 341 + } 342 + 343 + for (i = 0; i < rreq->direct_bv_count; i++) 344 + flush_dcache_page(rreq->direct_bv[i].bv_page); 345 + 346 + rreq->transferred = transferred; 347 + task_io_account_read(transferred); 348 + 349 + if (rreq->iocb) { 350 + rreq->iocb->ki_pos += transferred; 351 + if (rreq->iocb->ki_complete) 352 + rreq->iocb->ki_complete( 353 + rreq->iocb, rreq->error ? rreq->error : transferred); 354 + } 355 + if (rreq->netfs_ops->done) 356 + rreq->netfs_ops->done(rreq); 357 + inode_dio_end(rreq->inode); 358 + } 359 + 360 + /* 347 361 * Assess the state of a read request and decide what to do next. 348 362 * 349 363 * Note that we could be in an ordinary kernel thread, on a workqueue or in ··· 400 340 return; 401 341 } 402 342 403 - netfs_rreq_unlock_folios(rreq); 343 + if (rreq->origin != NETFS_DIO_READ) 344 + netfs_rreq_unlock_folios(rreq); 345 + else 346 + netfs_rreq_assess_dio(rreq); 404 347 348 + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); 405 349 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 406 350 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); 407 351 ··· 463 399 struct netfs_io_request *rreq = subreq->rreq; 464 400 int u; 465 401 466 - _enter("[%u]{%llx,%lx},%zd", 467 - subreq->debug_index, subreq->start, subreq->flags, 468 - transferred_or_error); 402 + _enter("R=%x[%x]{%llx,%lx},%zd", 403 + rreq->debug_id, subreq->debug_index, 404 + subreq->start, subreq->flags, transferred_or_error); 469 405 470 406 switch (subreq->source) { 471 407 case NETFS_READ_FROM_CACHE: ··· 565 501 */ 566 502 static enum netfs_io_source 567 503 netfs_rreq_prepare_read(struct netfs_io_request *rreq, 568 - struct netfs_io_subrequest *subreq) 504 + struct netfs_io_subrequest *subreq, 505 + struct iov_iter *io_iter) 569 506 { 570 - enum netfs_io_source source; 507 + enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; 508 + struct netfs_inode *ictx = netfs_inode(rreq->inode); 509 + size_t lsize; 571 510 572 511 _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); 573 512 574 - source = netfs_cache_prepare_read(subreq, rreq->i_size); 575 - if (source == NETFS_INVALID_READ) 576 - goto out; 513 + if (rreq->origin != NETFS_DIO_READ) { 514 + source = netfs_cache_prepare_read(subreq, rreq->i_size); 515 + if (source == NETFS_INVALID_READ) 516 + goto out; 517 + } 577 518 578 519 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 579 520 /* Call out to the netfs to let it shrink the request to fit ··· 587 518 * to make serial calls, it can indicate a short read and then 588 519 * we will call it again. 589 520 */ 521 + if (rreq->origin != NETFS_DIO_READ) { 522 + if (subreq->start >= ictx->zero_point) { 523 + source = NETFS_FILL_WITH_ZEROES; 524 + goto set; 525 + } 526 + if (subreq->len > ictx->zero_point - subreq->start) 527 + subreq->len = ictx->zero_point - subreq->start; 528 + } 590 529 if (subreq->len > rreq->i_size - subreq->start) 591 530 subreq->len = rreq->i_size - subreq->start; 531 + if (rreq->rsize && subreq->len > rreq->rsize) 532 + subreq->len = rreq->rsize; 592 533 593 534 if (rreq->netfs_ops->clamp_length && 594 535 !rreq->netfs_ops->clamp_length(subreq)) { 595 536 source = NETFS_INVALID_READ; 596 537 goto out; 597 538 } 539 + 540 + if (subreq->max_nr_segs) { 541 + lsize = netfs_limit_iter(io_iter, 0, subreq->len, 542 + subreq->max_nr_segs); 543 + if (subreq->len > lsize) { 544 + subreq->len = lsize; 545 + trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 546 + } 547 + } 598 548 } 599 549 600 - if (WARN_ON(subreq->len == 0)) 601 - source = NETFS_INVALID_READ; 550 + set: 551 + if (subreq->len > rreq->len) 552 + pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n", 553 + rreq->debug_id, subreq->debug_index, 554 + subreq->len, rreq->len); 602 555 556 + if (WARN_ON(subreq->len == 0)) { 557 + source = NETFS_INVALID_READ; 558 + goto out; 559 + } 560 + 561 + subreq->source = source; 562 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 563 + 564 + subreq->io_iter = *io_iter; 565 + iov_iter_truncate(&subreq->io_iter, subreq->len); 566 + iov_iter_advance(io_iter, subreq->len); 603 567 out: 604 568 subreq->source = source; 605 569 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); ··· 643 541 * Slice off a piece of a read request and submit an I/O request for it. 644 542 */ 645 543 static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, 544 + struct iov_iter *io_iter, 646 545 unsigned int *_debug_index) 647 546 { 648 547 struct netfs_io_subrequest *subreq; ··· 655 552 656 553 subreq->debug_index = (*_debug_index)++; 657 554 subreq->start = rreq->start + rreq->submitted; 658 - subreq->len = rreq->len - rreq->submitted; 555 + subreq->len = io_iter->count; 659 556 660 557 _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); 661 558 list_add_tail(&subreq->rreq_link, &rreq->subrequests); ··· 668 565 * (the starts must coincide), in which case, we go around the loop 669 566 * again and ask it to download the next piece. 670 567 */ 671 - source = netfs_rreq_prepare_read(rreq, subreq); 568 + source = netfs_rreq_prepare_read(rreq, subreq, io_iter); 672 569 if (source == NETFS_INVALID_READ) 673 570 goto subreq_failed; 674 571 ··· 706 603 */ 707 604 int netfs_begin_read(struct netfs_io_request *rreq, bool sync) 708 605 { 606 + struct iov_iter io_iter; 709 607 unsigned int debug_index = 0; 710 608 int ret; 711 609 ··· 715 611 716 612 if (rreq->len == 0) { 717 613 pr_err("Zero-sized read [R=%x]\n", rreq->debug_id); 718 - netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len); 719 614 return -EIO; 720 615 } 721 616 722 - INIT_WORK(&rreq->work, netfs_rreq_work); 617 + if (rreq->origin == NETFS_DIO_READ) 618 + inode_dio_begin(rreq->inode); 723 619 724 - if (sync) 725 - netfs_get_request(rreq, netfs_rreq_trace_get_hold); 620 + // TODO: Use bounce buffer if requested 621 + rreq->io_iter = rreq->iter; 622 + 623 + INIT_WORK(&rreq->work, netfs_rreq_work); 726 624 727 625 /* Chop the read into slices according to what the cache and the netfs 728 626 * want and submit each one. 729 627 */ 628 + netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding); 730 629 atomic_set(&rreq->nr_outstanding, 1); 630 + io_iter = rreq->io_iter; 731 631 do { 732 - if (!netfs_rreq_submit_slice(rreq, &debug_index)) 632 + _debug("submit %llx + %zx >= %llx", 633 + rreq->start, rreq->submitted, rreq->i_size); 634 + if (rreq->origin == NETFS_DIO_READ && 635 + rreq->start + rreq->submitted >= rreq->i_size) 636 + break; 637 + if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index)) 638 + break; 639 + if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && 640 + test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) 733 641 break; 734 642 735 643 } while (rreq->submitted < rreq->len); 736 644 645 + if (!rreq->submitted) { 646 + netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); 647 + ret = 0; 648 + goto out; 649 + } 650 + 737 651 if (sync) { 738 - /* Keep nr_outstanding incremented so that the ref always belongs to 739 - * us, and the service code isn't punted off to a random thread pool to 740 - * process. 652 + /* Keep nr_outstanding incremented so that the ref always 653 + * belongs to us, and the service code isn't punted off to a 654 + * random thread pool to process. Note that this might start 655 + * further work, such as writing to the cache. 741 656 */ 742 - for (;;) { 743 - wait_var_event(&rreq->nr_outstanding, 744 - atomic_read(&rreq->nr_outstanding) == 1); 657 + wait_var_event(&rreq->nr_outstanding, 658 + atomic_read(&rreq->nr_outstanding) == 1); 659 + if (atomic_dec_and_test(&rreq->nr_outstanding)) 745 660 netfs_rreq_assess(rreq, false); 746 - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) 747 - break; 748 - cond_resched(); 749 - } 661 + 662 + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 663 + wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, 664 + TASK_UNINTERRUPTIBLE); 750 665 751 666 ret = rreq->error; 752 - if (ret == 0 && rreq->submitted < rreq->len) { 667 + if (ret == 0 && rreq->submitted < rreq->len && 668 + rreq->origin != NETFS_DIO_READ) { 753 669 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 754 670 ret = -EIO; 755 671 } 756 - netfs_put_request(rreq, false, netfs_rreq_trace_put_hold); 757 672 } else { 758 673 /* If we decrement nr_outstanding to 0, the ref belongs to us. */ 759 674 if (atomic_dec_and_test(&rreq->nr_outstanding)) 760 675 netfs_rreq_assess(rreq, false); 761 - ret = 0; 676 + ret = -EIOCBQUEUED; 762 677 } 678 + 679 + out: 763 680 return ret; 764 681 }

+97

fs/netfs/iterator.c

··· 101 101 return npages; 102 102 } 103 103 EXPORT_SYMBOL_GPL(netfs_extract_user_iter); 104 + 105 + /* 106 + * Select the span of a bvec iterator we're going to use. Limit it by both maximum 107 + * size and maximum number of segments. Returns the size of the span in bytes. 108 + */ 109 + static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset, 110 + size_t max_size, size_t max_segs) 111 + { 112 + const struct bio_vec *bvecs = iter->bvec; 113 + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 114 + size_t len, span = 0, n = iter->count; 115 + size_t skip = iter->iov_offset + start_offset; 116 + 117 + if (WARN_ON(!iov_iter_is_bvec(iter)) || 118 + WARN_ON(start_offset > n) || 119 + n == 0) 120 + return 0; 121 + 122 + while (n && ix < nbv && skip) { 123 + len = bvecs[ix].bv_len; 124 + if (skip < len) 125 + break; 126 + skip -= len; 127 + n -= len; 128 + ix++; 129 + } 130 + 131 + while (n && ix < nbv) { 132 + len = min3(n, bvecs[ix].bv_len - skip, max_size); 133 + span += len; 134 + nsegs++; 135 + ix++; 136 + if (span >= max_size || nsegs >= max_segs) 137 + break; 138 + skip = 0; 139 + n -= len; 140 + } 141 + 142 + return min(span, max_size); 143 + } 144 + 145 + /* 146 + * Select the span of an xarray iterator we're going to use. Limit it by both 147 + * maximum size and maximum number of segments. It is assumed that segments 148 + * can be larger than a page in size, provided they're physically contiguous. 149 + * Returns the size of the span in bytes. 150 + */ 151 + static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offset, 152 + size_t max_size, size_t max_segs) 153 + { 154 + struct folio *folio; 155 + unsigned int nsegs = 0; 156 + loff_t pos = iter->xarray_start + iter->iov_offset; 157 + pgoff_t index = pos / PAGE_SIZE; 158 + size_t span = 0, n = iter->count; 159 + 160 + XA_STATE(xas, iter->xarray, index); 161 + 162 + if (WARN_ON(!iov_iter_is_xarray(iter)) || 163 + WARN_ON(start_offset > n) || 164 + n == 0) 165 + return 0; 166 + max_size = min(max_size, n - start_offset); 167 + 168 + rcu_read_lock(); 169 + xas_for_each(&xas, folio, ULONG_MAX) { 170 + size_t offset, flen, len; 171 + if (xas_retry(&xas, folio)) 172 + continue; 173 + if (WARN_ON(xa_is_value(folio))) 174 + break; 175 + if (WARN_ON(folio_test_hugetlb(folio))) 176 + break; 177 + 178 + flen = folio_size(folio); 179 + offset = offset_in_folio(folio, pos); 180 + len = min(max_size, flen - offset); 181 + span += len; 182 + nsegs++; 183 + if (span >= max_size || nsegs >= max_segs) 184 + break; 185 + } 186 + 187 + rcu_read_unlock(); 188 + return min(span, max_size); 189 + } 190 + 191 + size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 192 + size_t max_size, size_t max_segs) 193 + { 194 + if (iov_iter_is_bvec(iter)) 195 + return netfs_limit_bvec(iter, start_offset, max_size, max_segs); 196 + if (iov_iter_is_xarray(iter)) 197 + return netfs_limit_xarray(iter, start_offset, max_size, max_segs); 198 + BUG(); 199 + } 200 + EXPORT_SYMBOL(netfs_limit_iter);

+216

fs/netfs/locking.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * I/O and data path helper functionality. 4 + * 5 + * Borrowed from NFS Copyright (c) 2016 Trond Myklebust 6 + */ 7 + 8 + #include <linux/kernel.h> 9 + #include <linux/netfs.h> 10 + #include "internal.h" 11 + 12 + /* 13 + * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish 14 + * @inode: inode to wait for 15 + * 16 + * Waits for all pending direct I/O requests to finish so that we can 17 + * proceed with a truncate or equivalent operation. 18 + * 19 + * Must be called under a lock that serializes taking new references 20 + * to i_dio_count, usually by inode->i_mutex. 21 + */ 22 + static int inode_dio_wait_interruptible(struct inode *inode) 23 + { 24 + if (!atomic_read(&inode->i_dio_count)) 25 + return 0; 26 + 27 + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); 28 + DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); 29 + 30 + for (;;) { 31 + prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); 32 + if (!atomic_read(&inode->i_dio_count)) 33 + break; 34 + if (signal_pending(current)) 35 + break; 36 + schedule(); 37 + } 38 + finish_wait(wq, &q.wq_entry); 39 + 40 + return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; 41 + } 42 + 43 + /* Call with exclusively locked inode->i_rwsem */ 44 + static int netfs_block_o_direct(struct netfs_inode *ictx) 45 + { 46 + if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) 47 + return 0; 48 + clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags); 49 + return inode_dio_wait_interruptible(&ictx->inode); 50 + } 51 + 52 + /** 53 + * netfs_start_io_read - declare the file is being used for buffered reads 54 + * @inode: file inode 55 + * 56 + * Declare that a buffered read operation is about to start, and ensure 57 + * that we block all direct I/O. 58 + * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is unset, 59 + * and holds a shared lock on inode->i_rwsem to ensure that the flag 60 + * cannot be changed. 61 + * In practice, this means that buffered read operations are allowed to 62 + * execute in parallel, thanks to the shared lock, whereas direct I/O 63 + * operations need to wait to grab an exclusive lock in order to set 64 + * NETFS_ICTX_ODIRECT. 65 + * Note that buffered writes and truncates both take a write lock on 66 + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. 67 + */ 68 + int netfs_start_io_read(struct inode *inode) 69 + __acquires(inode->i_rwsem) 70 + { 71 + struct netfs_inode *ictx = netfs_inode(inode); 72 + 73 + /* Be an optimist! */ 74 + if (down_read_interruptible(&inode->i_rwsem) < 0) 75 + return -ERESTARTSYS; 76 + if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) == 0) 77 + return 0; 78 + up_read(&inode->i_rwsem); 79 + 80 + /* Slow path.... */ 81 + if (down_write_killable(&inode->i_rwsem) < 0) 82 + return -ERESTARTSYS; 83 + if (netfs_block_o_direct(ictx) < 0) { 84 + up_write(&inode->i_rwsem); 85 + return -ERESTARTSYS; 86 + } 87 + downgrade_write(&inode->i_rwsem); 88 + return 0; 89 + } 90 + EXPORT_SYMBOL(netfs_start_io_read); 91 + 92 + /** 93 + * netfs_end_io_read - declare that the buffered read operation is done 94 + * @inode: file inode 95 + * 96 + * Declare that a buffered read operation is done, and release the shared 97 + * lock on inode->i_rwsem. 98 + */ 99 + void netfs_end_io_read(struct inode *inode) 100 + __releases(inode->i_rwsem) 101 + { 102 + up_read(&inode->i_rwsem); 103 + } 104 + EXPORT_SYMBOL(netfs_end_io_read); 105 + 106 + /** 107 + * netfs_start_io_write - declare the file is being used for buffered writes 108 + * @inode: file inode 109 + * 110 + * Declare that a buffered read operation is about to start, and ensure 111 + * that we block all direct I/O. 112 + */ 113 + int netfs_start_io_write(struct inode *inode) 114 + __acquires(inode->i_rwsem) 115 + { 116 + struct netfs_inode *ictx = netfs_inode(inode); 117 + 118 + if (down_write_killable(&inode->i_rwsem) < 0) 119 + return -ERESTARTSYS; 120 + if (netfs_block_o_direct(ictx) < 0) { 121 + up_write(&inode->i_rwsem); 122 + return -ERESTARTSYS; 123 + } 124 + return 0; 125 + } 126 + EXPORT_SYMBOL(netfs_start_io_write); 127 + 128 + /** 129 + * netfs_end_io_write - declare that the buffered write operation is done 130 + * @inode: file inode 131 + * 132 + * Declare that a buffered write operation is done, and release the 133 + * lock on inode->i_rwsem. 134 + */ 135 + void netfs_end_io_write(struct inode *inode) 136 + __releases(inode->i_rwsem) 137 + { 138 + up_write(&inode->i_rwsem); 139 + } 140 + EXPORT_SYMBOL(netfs_end_io_write); 141 + 142 + /* Call with exclusively locked inode->i_rwsem */ 143 + static int netfs_block_buffered(struct inode *inode) 144 + { 145 + struct netfs_inode *ictx = netfs_inode(inode); 146 + int ret; 147 + 148 + if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) { 149 + set_bit(NETFS_ICTX_ODIRECT, &ictx->flags); 150 + if (inode->i_mapping->nrpages != 0) { 151 + unmap_mapping_range(inode->i_mapping, 0, 0, 0); 152 + ret = filemap_fdatawait(inode->i_mapping); 153 + if (ret < 0) { 154 + clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags); 155 + return ret; 156 + } 157 + } 158 + } 159 + return 0; 160 + } 161 + 162 + /** 163 + * netfs_start_io_direct - declare the file is being used for direct i/o 164 + * @inode: file inode 165 + * 166 + * Declare that a direct I/O operation is about to start, and ensure 167 + * that we block all buffered I/O. 168 + * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is set, 169 + * and holds a shared lock on inode->i_rwsem to ensure that the flag 170 + * cannot be changed. 171 + * In practice, this means that direct I/O operations are allowed to 172 + * execute in parallel, thanks to the shared lock, whereas buffered I/O 173 + * operations need to wait to grab an exclusive lock in order to clear 174 + * NETFS_ICTX_ODIRECT. 175 + * Note that buffered writes and truncates both take a write lock on 176 + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. 177 + */ 178 + int netfs_start_io_direct(struct inode *inode) 179 + __acquires(inode->i_rwsem) 180 + { 181 + struct netfs_inode *ictx = netfs_inode(inode); 182 + int ret; 183 + 184 + /* Be an optimist! */ 185 + if (down_read_interruptible(&inode->i_rwsem) < 0) 186 + return -ERESTARTSYS; 187 + if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) != 0) 188 + return 0; 189 + up_read(&inode->i_rwsem); 190 + 191 + /* Slow path.... */ 192 + if (down_write_killable(&inode->i_rwsem) < 0) 193 + return -ERESTARTSYS; 194 + ret = netfs_block_buffered(inode); 195 + if (ret < 0) { 196 + up_write(&inode->i_rwsem); 197 + return ret; 198 + } 199 + downgrade_write(&inode->i_rwsem); 200 + return 0; 201 + } 202 + EXPORT_SYMBOL(netfs_start_io_direct); 203 + 204 + /** 205 + * netfs_end_io_direct - declare that the direct i/o operation is done 206 + * @inode: file inode 207 + * 208 + * Declare that a direct I/O operation is done, and release the shared 209 + * lock on inode->i_rwsem. 210 + */ 211 + void netfs_end_io_direct(struct inode *inode) 212 + __releases(inode->i_rwsem) 213 + { 214 + up_read(&inode->i_rwsem); 215 + } 216 + EXPORT_SYMBOL(netfs_end_io_direct);

+109

fs/netfs/main.c

··· 7 7 8 8 #include <linux/module.h> 9 9 #include <linux/export.h> 10 + #include <linux/proc_fs.h> 11 + #include <linux/seq_file.h> 10 12 #include "internal.h" 11 13 #define CREATE_TRACE_POINTS 12 14 #include <trace/events/netfs.h> ··· 17 15 MODULE_AUTHOR("Red Hat, Inc."); 18 16 MODULE_LICENSE("GPL"); 19 17 18 + EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); 19 + 20 20 unsigned netfs_debug; 21 21 module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); 22 22 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); 23 + 24 + #ifdef CONFIG_PROC_FS 25 + LIST_HEAD(netfs_io_requests); 26 + DEFINE_SPINLOCK(netfs_proc_lock); 27 + 28 + static const char *netfs_origins[nr__netfs_io_origin] = { 29 + [NETFS_READAHEAD] = "RA", 30 + [NETFS_READPAGE] = "RP", 31 + [NETFS_READ_FOR_WRITE] = "RW", 32 + [NETFS_WRITEBACK] = "WB", 33 + [NETFS_WRITETHROUGH] = "WT", 34 + [NETFS_LAUNDER_WRITE] = "LW", 35 + [NETFS_UNBUFFERED_WRITE] = "UW", 36 + [NETFS_DIO_READ] = "DR", 37 + [NETFS_DIO_WRITE] = "DW", 38 + }; 39 + 40 + /* 41 + * Generate a list of I/O requests in /proc/fs/netfs/requests 42 + */ 43 + static int netfs_requests_seq_show(struct seq_file *m, void *v) 44 + { 45 + struct netfs_io_request *rreq; 46 + 47 + if (v == &netfs_io_requests) { 48 + seq_puts(m, 49 + "REQUEST OR REF FL ERR OPS COVERAGE\n" 50 + "======== == === == ==== === =========\n" 51 + ); 52 + return 0; 53 + } 54 + 55 + rreq = list_entry(v, struct netfs_io_request, proc_link); 56 + seq_printf(m, 57 + "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx", 58 + rreq->debug_id, 59 + netfs_origins[rreq->origin], 60 + refcount_read(&rreq->ref), 61 + rreq->flags, 62 + rreq->error, 63 + atomic_read(&rreq->nr_outstanding), 64 + rreq->start, rreq->submitted, rreq->len); 65 + seq_putc(m, '\n'); 66 + return 0; 67 + } 68 + 69 + static void *netfs_requests_seq_start(struct seq_file *m, loff_t *_pos) 70 + __acquires(rcu) 71 + { 72 + rcu_read_lock(); 73 + return seq_list_start_head(&netfs_io_requests, *_pos); 74 + } 75 + 76 + static void *netfs_requests_seq_next(struct seq_file *m, void *v, loff_t *_pos) 77 + { 78 + return seq_list_next(v, &netfs_io_requests, _pos); 79 + } 80 + 81 + static void netfs_requests_seq_stop(struct seq_file *m, void *v) 82 + __releases(rcu) 83 + { 84 + rcu_read_unlock(); 85 + } 86 + 87 + static const struct seq_operations netfs_requests_seq_ops = { 88 + .start = netfs_requests_seq_start, 89 + .next = netfs_requests_seq_next, 90 + .stop = netfs_requests_seq_stop, 91 + .show = netfs_requests_seq_show, 92 + }; 93 + #endif /* CONFIG_PROC_FS */ 94 + 95 + static int __init netfs_init(void) 96 + { 97 + int ret = -ENOMEM; 98 + 99 + if (!proc_mkdir("fs/netfs", NULL)) 100 + goto error; 101 + if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, 102 + &netfs_requests_seq_ops)) 103 + goto error_proc; 104 + #ifdef CONFIG_FSCACHE_STATS 105 + if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, 106 + netfs_stats_show)) 107 + goto error_proc; 108 + #endif 109 + 110 + ret = fscache_init(); 111 + if (ret < 0) 112 + goto error_proc; 113 + return 0; 114 + 115 + error_proc: 116 + remove_proc_entry("fs/netfs", NULL); 117 + error: 118 + return ret; 119 + } 120 + fs_initcall(netfs_init); 121 + 122 + static void __exit netfs_exit(void) 123 + { 124 + fscache_exit(); 125 + remove_proc_entry("fs/netfs", NULL); 126 + } 127 + module_exit(netfs_exit);

+260

fs/netfs/misc.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Miscellaneous routines. 3 + * 4 + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/swap.h> 9 + #include "internal.h" 10 + 11 + /* 12 + * Attach a folio to the buffer and maybe set marks on it to say that we need 13 + * to put the folio later and twiddle the pagecache flags. 14 + */ 15 + int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, 16 + struct folio *folio, unsigned int flags, 17 + gfp_t gfp_mask) 18 + { 19 + XA_STATE_ORDER(xas, xa, index, folio_order(folio)); 20 + 21 + retry: 22 + xas_lock(&xas); 23 + for (;;) { 24 + xas_store(&xas, folio); 25 + if (!xas_error(&xas)) 26 + break; 27 + xas_unlock(&xas); 28 + if (!xas_nomem(&xas, gfp_mask)) 29 + return xas_error(&xas); 30 + goto retry; 31 + } 32 + 33 + if (flags & NETFS_FLAG_PUT_MARK) 34 + xas_set_mark(&xas, NETFS_BUF_PUT_MARK); 35 + if (flags & NETFS_FLAG_PAGECACHE_MARK) 36 + xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK); 37 + xas_unlock(&xas); 38 + return xas_error(&xas); 39 + } 40 + 41 + /* 42 + * Create the specified range of folios in the buffer attached to the read 43 + * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that 44 + * these need freeing later. 45 + */ 46 + int netfs_add_folios_to_buffer(struct xarray *buffer, 47 + struct address_space *mapping, 48 + pgoff_t index, pgoff_t to, gfp_t gfp_mask) 49 + { 50 + struct folio *folio; 51 + int ret; 52 + 53 + if (to + 1 == index) /* Page range is inclusive */ 54 + return 0; 55 + 56 + do { 57 + /* TODO: Figure out what order folio can be allocated here */ 58 + folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0); 59 + if (!folio) 60 + return -ENOMEM; 61 + folio->index = index; 62 + ret = netfs_xa_store_and_mark(buffer, index, folio, 63 + NETFS_FLAG_PUT_MARK, gfp_mask); 64 + if (ret < 0) { 65 + folio_put(folio); 66 + return ret; 67 + } 68 + 69 + index += folio_nr_pages(folio); 70 + } while (index <= to && index != 0); 71 + 72 + return 0; 73 + } 74 + 75 + /* 76 + * Clear an xarray buffer, putting a ref on the folios that have 77 + * NETFS_BUF_PUT_MARK set. 78 + */ 79 + void netfs_clear_buffer(struct xarray *buffer) 80 + { 81 + struct folio *folio; 82 + XA_STATE(xas, buffer, 0); 83 + 84 + rcu_read_lock(); 85 + xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) { 86 + folio_put(folio); 87 + } 88 + rcu_read_unlock(); 89 + xa_destroy(buffer); 90 + } 91 + 92 + /** 93 + * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback 94 + * @mapping: The mapping the folio belongs to. 95 + * @folio: The folio being dirtied. 96 + * 97 + * Set the dirty flag on a folio and pin an in-use cache object in memory so 98 + * that writeback can later write to it. This is intended to be called from 99 + * the filesystem's ->dirty_folio() method. 100 + * 101 + * Return: true if the dirty flag was set on the folio, false otherwise. 102 + */ 103 + bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) 104 + { 105 + struct inode *inode = mapping->host; 106 + struct netfs_inode *ictx = netfs_inode(inode); 107 + struct fscache_cookie *cookie = netfs_i_cookie(ictx); 108 + bool need_use = false; 109 + 110 + _enter(""); 111 + 112 + if (!filemap_dirty_folio(mapping, folio)) 113 + return false; 114 + if (!fscache_cookie_valid(cookie)) 115 + return true; 116 + 117 + if (!(inode->i_state & I_PINNING_NETFS_WB)) { 118 + spin_lock(&inode->i_lock); 119 + if (!(inode->i_state & I_PINNING_NETFS_WB)) { 120 + inode->i_state |= I_PINNING_NETFS_WB; 121 + need_use = true; 122 + } 123 + spin_unlock(&inode->i_lock); 124 + 125 + if (need_use) 126 + fscache_use_cookie(cookie, true); 127 + } 128 + return true; 129 + } 130 + EXPORT_SYMBOL(netfs_dirty_folio); 131 + 132 + /** 133 + * netfs_unpin_writeback - Unpin writeback resources 134 + * @inode: The inode on which the cookie resides 135 + * @wbc: The writeback control 136 + * 137 + * Unpin the writeback resources pinned by netfs_dirty_folio(). This is 138 + * intended to be called as/by the netfs's ->write_inode() method. 139 + */ 140 + int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc) 141 + { 142 + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 143 + 144 + if (wbc->unpinned_netfs_wb) 145 + fscache_unuse_cookie(cookie, NULL, NULL); 146 + return 0; 147 + } 148 + EXPORT_SYMBOL(netfs_unpin_writeback); 149 + 150 + /** 151 + * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode 152 + * @inode: The inode to clean up 153 + * @aux: Auxiliary data to apply to the inode 154 + * 155 + * Clear any writeback resources held by an inode when the inode is evicted. 156 + * This must be called before clear_inode() is called. 157 + */ 158 + void netfs_clear_inode_writeback(struct inode *inode, const void *aux) 159 + { 160 + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 161 + 162 + if (inode->i_state & I_PINNING_NETFS_WB) { 163 + loff_t i_size = i_size_read(inode); 164 + fscache_unuse_cookie(cookie, aux, &i_size); 165 + } 166 + } 167 + EXPORT_SYMBOL(netfs_clear_inode_writeback); 168 + 169 + /** 170 + * netfs_invalidate_folio - Invalidate or partially invalidate a folio 171 + * @folio: Folio proposed for release 172 + * @offset: Offset of the invalidated region 173 + * @length: Length of the invalidated region 174 + * 175 + * Invalidate part or all of a folio for a network filesystem. The folio will 176 + * be removed afterwards if the invalidated region covers the entire folio. 177 + */ 178 + void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 179 + { 180 + struct netfs_folio *finfo = NULL; 181 + size_t flen = folio_size(folio); 182 + 183 + _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); 184 + 185 + folio_wait_fscache(folio); 186 + 187 + if (!folio_test_private(folio)) 188 + return; 189 + 190 + finfo = netfs_folio_info(folio); 191 + 192 + if (offset == 0 && length >= flen) 193 + goto erase_completely; 194 + 195 + if (finfo) { 196 + /* We have a partially uptodate page from a streaming write. */ 197 + unsigned int fstart = finfo->dirty_offset; 198 + unsigned int fend = fstart + finfo->dirty_len; 199 + unsigned int end = offset + length; 200 + 201 + if (offset >= fend) 202 + return; 203 + if (end <= fstart) 204 + return; 205 + if (offset <= fstart && end >= fend) 206 + goto erase_completely; 207 + if (offset <= fstart && end > fstart) 208 + goto reduce_len; 209 + if (offset > fstart && end >= fend) 210 + goto move_start; 211 + /* A partial write was split. The caller has already zeroed 212 + * it, so just absorb the hole. 213 + */ 214 + } 215 + return; 216 + 217 + erase_completely: 218 + netfs_put_group(netfs_folio_group(folio)); 219 + folio_detach_private(folio); 220 + folio_clear_uptodate(folio); 221 + kfree(finfo); 222 + return; 223 + reduce_len: 224 + finfo->dirty_len = offset + length - finfo->dirty_offset; 225 + return; 226 + move_start: 227 + finfo->dirty_len -= offset - finfo->dirty_offset; 228 + finfo->dirty_offset = offset; 229 + } 230 + EXPORT_SYMBOL(netfs_invalidate_folio); 231 + 232 + /** 233 + * netfs_release_folio - Try to release a folio 234 + * @folio: Folio proposed for release 235 + * @gfp: Flags qualifying the release 236 + * 237 + * Request release of a folio and clean up its private state if it's not busy. 238 + * Returns true if the folio can now be released, false if not 239 + */ 240 + bool netfs_release_folio(struct folio *folio, gfp_t gfp) 241 + { 242 + struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); 243 + unsigned long long end; 244 + 245 + end = folio_pos(folio) + folio_size(folio); 246 + if (end > ctx->zero_point) 247 + ctx->zero_point = end; 248 + 249 + if (folio_test_private(folio)) 250 + return false; 251 + if (folio_test_fscache(folio)) { 252 + if (current_is_kswapd() || !(gfp & __GFP_FS)) 253 + return false; 254 + folio_wait_fscache(folio); 255 + } 256 + 257 + fscache_note_page_release(netfs_i_cookie(ctx)); 258 + return true; 259 + } 260 + EXPORT_SYMBOL(netfs_release_folio);

+46 -13

fs/netfs/objects.c

··· 20 20 struct inode *inode = file ? file_inode(file) : mapping->host; 21 21 struct netfs_inode *ctx = netfs_inode(inode); 22 22 struct netfs_io_request *rreq; 23 + bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || 24 + origin == NETFS_DIO_READ || 25 + origin == NETFS_DIO_WRITE); 26 + bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); 23 27 int ret; 24 28 25 - rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); 29 + rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request), 30 + GFP_KERNEL); 26 31 if (!rreq) 27 32 return ERR_PTR(-ENOMEM); 28 33 29 34 rreq->start = start; 30 35 rreq->len = len; 36 + rreq->upper_len = len; 31 37 rreq->origin = origin; 32 38 rreq->netfs_ops = ctx->ops; 33 39 rreq->mapping = mapping; ··· 41 35 rreq->i_size = i_size_read(inode); 42 36 rreq->debug_id = atomic_inc_return(&debug_ids); 43 37 INIT_LIST_HEAD(&rreq->subrequests); 38 + INIT_WORK(&rreq->work, NULL); 44 39 refcount_set(&rreq->ref, 1); 40 + 45 41 __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 42 + if (cached) 43 + __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); 44 + if (file && file->f_flags & O_NONBLOCK) 45 + __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); 46 46 if (rreq->netfs_ops->init_request) { 47 47 ret = rreq->netfs_ops->init_request(rreq, file); 48 48 if (ret < 0) { ··· 57 45 } 58 46 } 59 47 48 + trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); 49 + netfs_proc_add_rreq(rreq); 60 50 netfs_stat(&netfs_n_rh_rreq); 61 51 return rreq; 62 52 } ··· 88 74 { 89 75 struct netfs_io_request *rreq = 90 76 container_of(work, struct netfs_io_request, work); 77 + unsigned int i; 91 78 92 79 trace_netfs_rreq(rreq, netfs_rreq_trace_free); 80 + netfs_proc_del_rreq(rreq); 93 81 netfs_clear_subrequests(rreq, false); 94 82 if (rreq->netfs_ops->free_request) 95 83 rreq->netfs_ops->free_request(rreq); 96 84 if (rreq->cache_resources.ops) 97 85 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 98 - kfree(rreq); 86 + if (rreq->direct_bv) { 87 + for (i = 0; i < rreq->direct_bv_count; i++) { 88 + if (rreq->direct_bv[i].bv_page) { 89 + if (rreq->direct_bv_unpin) 90 + unpin_user_page(rreq->direct_bv[i].bv_page); 91 + } 92 + } 93 + kvfree(rreq->direct_bv); 94 + } 95 + kfree_rcu(rreq, rcu); 99 96 netfs_stat_d(&netfs_n_rh_rreq); 100 97 } 101 98 102 99 void netfs_put_request(struct netfs_io_request *rreq, bool was_async, 103 100 enum netfs_rreq_ref_trace what) 104 101 { 105 - unsigned int debug_id = rreq->debug_id; 102 + unsigned int debug_id; 106 103 bool dead; 107 104 int r; 108 105 109 - dead = __refcount_dec_and_test(&rreq->ref, &r); 110 - trace_netfs_rreq_ref(debug_id, r - 1, what); 111 - if (dead) { 112 - if (was_async) { 113 - rreq->work.func = netfs_free_request; 114 - if (!queue_work(system_unbound_wq, &rreq->work)) 115 - BUG(); 116 - } else { 117 - netfs_free_request(&rreq->work); 106 + if (rreq) { 107 + debug_id = rreq->debug_id; 108 + dead = __refcount_dec_and_test(&rreq->ref, &r); 109 + trace_netfs_rreq_ref(debug_id, r - 1, what); 110 + if (dead) { 111 + if (was_async) { 112 + rreq->work.func = netfs_free_request; 113 + if (!queue_work(system_unbound_wq, &rreq->work)) 114 + BUG(); 115 + } else { 116 + netfs_free_request(&rreq->work); 117 + } 118 118 } 119 119 } 120 120 } ··· 140 112 { 141 113 struct netfs_io_subrequest *subreq; 142 114 143 - subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL); 115 + subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?: 116 + sizeof(struct netfs_io_subrequest), 117 + GFP_KERNEL); 144 118 if (subreq) { 119 + INIT_WORK(&subreq->work, NULL); 145 120 INIT_LIST_HEAD(&subreq->rreq_link); 146 121 refcount_set(&subreq->ref, 2); 147 122 subreq->rreq = rreq; ··· 171 140 struct netfs_io_request *rreq = subreq->rreq; 172 141 173 142 trace_netfs_sreq(subreq, netfs_sreq_trace_free); 143 + if (rreq->netfs_ops->free_subrequest) 144 + rreq->netfs_ops->free_subrequest(subreq); 174 145 kfree(subreq); 175 146 netfs_stat_d(&netfs_n_rh_sreq); 176 147 netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);

+478

fs/netfs/output.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem high-level write support. 3 + * 4 + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + */ 7 + 8 + #include <linux/fs.h> 9 + #include <linux/mm.h> 10 + #include <linux/pagemap.h> 11 + #include <linux/slab.h> 12 + #include <linux/writeback.h> 13 + #include <linux/pagevec.h> 14 + #include "internal.h" 15 + 16 + /** 17 + * netfs_create_write_request - Create a write operation. 18 + * @wreq: The write request this is storing from. 19 + * @dest: The destination type 20 + * @start: Start of the region this write will modify 21 + * @len: Length of the modification 22 + * @worker: The worker function to handle the write(s) 23 + * 24 + * Allocate a write operation, set it up and add it to the list on a write 25 + * request. 26 + */ 27 + struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, 28 + enum netfs_io_source dest, 29 + loff_t start, size_t len, 30 + work_func_t worker) 31 + { 32 + struct netfs_io_subrequest *subreq; 33 + 34 + subreq = netfs_alloc_subrequest(wreq); 35 + if (subreq) { 36 + INIT_WORK(&subreq->work, worker); 37 + subreq->source = dest; 38 + subreq->start = start; 39 + subreq->len = len; 40 + subreq->debug_index = wreq->subreq_counter++; 41 + 42 + switch (subreq->source) { 43 + case NETFS_UPLOAD_TO_SERVER: 44 + netfs_stat(&netfs_n_wh_upload); 45 + break; 46 + case NETFS_WRITE_TO_CACHE: 47 + netfs_stat(&netfs_n_wh_write); 48 + break; 49 + default: 50 + BUG(); 51 + } 52 + 53 + subreq->io_iter = wreq->io_iter; 54 + iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); 55 + iov_iter_truncate(&subreq->io_iter, subreq->len); 56 + 57 + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 58 + refcount_read(&subreq->ref), 59 + netfs_sreq_trace_new); 60 + atomic_inc(&wreq->nr_outstanding); 61 + list_add_tail(&subreq->rreq_link, &wreq->subrequests); 62 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 63 + } 64 + 65 + return subreq; 66 + } 67 + EXPORT_SYMBOL(netfs_create_write_request); 68 + 69 + /* 70 + * Process a completed write request once all the component operations have 71 + * been completed. 72 + */ 73 + static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) 74 + { 75 + struct netfs_io_subrequest *subreq; 76 + struct netfs_inode *ctx = netfs_inode(wreq->inode); 77 + size_t transferred = 0; 78 + 79 + _enter("R=%x[]", wreq->debug_id); 80 + 81 + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 82 + 83 + list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 84 + if (subreq->error || subreq->transferred == 0) 85 + break; 86 + transferred += subreq->transferred; 87 + if (subreq->transferred < subreq->len) 88 + break; 89 + } 90 + wreq->transferred = transferred; 91 + 92 + list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 93 + if (!subreq->error) 94 + continue; 95 + switch (subreq->source) { 96 + case NETFS_UPLOAD_TO_SERVER: 97 + /* Depending on the type of failure, this may prevent 98 + * writeback completion unless we're in disconnected 99 + * mode. 100 + */ 101 + if (!wreq->error) 102 + wreq->error = subreq->error; 103 + break; 104 + 105 + case NETFS_WRITE_TO_CACHE: 106 + /* Failure doesn't prevent writeback completion unless 107 + * we're in disconnected mode. 108 + */ 109 + if (subreq->error != -ENOBUFS) 110 + ctx->ops->invalidate_cache(wreq); 111 + break; 112 + 113 + default: 114 + WARN_ON_ONCE(1); 115 + if (!wreq->error) 116 + wreq->error = -EIO; 117 + return; 118 + } 119 + } 120 + 121 + wreq->cleanup(wreq); 122 + 123 + if (wreq->origin == NETFS_DIO_WRITE && 124 + wreq->mapping->nrpages) { 125 + pgoff_t first = wreq->start >> PAGE_SHIFT; 126 + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 127 + invalidate_inode_pages2_range(wreq->mapping, first, last); 128 + } 129 + 130 + if (wreq->origin == NETFS_DIO_WRITE) 131 + inode_dio_end(wreq->inode); 132 + 133 + _debug("finished"); 134 + trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 135 + clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 136 + wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 137 + 138 + if (wreq->iocb) { 139 + wreq->iocb->ki_pos += transferred; 140 + if (wreq->iocb->ki_complete) 141 + wreq->iocb->ki_complete( 142 + wreq->iocb, wreq->error ? wreq->error : transferred); 143 + } 144 + 145 + netfs_clear_subrequests(wreq, was_async); 146 + netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); 147 + } 148 + 149 + /* 150 + * Deal with the completion of writing the data to the cache. 151 + */ 152 + void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 153 + bool was_async) 154 + { 155 + struct netfs_io_subrequest *subreq = _op; 156 + struct netfs_io_request *wreq = subreq->rreq; 157 + unsigned int u; 158 + 159 + _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 160 + 161 + switch (subreq->source) { 162 + case NETFS_UPLOAD_TO_SERVER: 163 + netfs_stat(&netfs_n_wh_upload_done); 164 + break; 165 + case NETFS_WRITE_TO_CACHE: 166 + netfs_stat(&netfs_n_wh_write_done); 167 + break; 168 + case NETFS_INVALID_WRITE: 169 + break; 170 + default: 171 + BUG(); 172 + } 173 + 174 + if (IS_ERR_VALUE(transferred_or_error)) { 175 + subreq->error = transferred_or_error; 176 + trace_netfs_failure(wreq, subreq, transferred_or_error, 177 + netfs_fail_write); 178 + goto failed; 179 + } 180 + 181 + if (WARN(transferred_or_error > subreq->len - subreq->transferred, 182 + "Subreq excess write: R%x[%x] %zd > %zu - %zu", 183 + wreq->debug_id, subreq->debug_index, 184 + transferred_or_error, subreq->len, subreq->transferred)) 185 + transferred_or_error = subreq->len - subreq->transferred; 186 + 187 + subreq->error = 0; 188 + subreq->transferred += transferred_or_error; 189 + 190 + if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) 191 + pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", 192 + wreq->debug_id, subreq->debug_index, 193 + iov_iter_count(&subreq->io_iter), subreq->len, 194 + subreq->transferred, subreq->io_iter.iter_type); 195 + 196 + if (subreq->transferred < subreq->len) 197 + goto incomplete; 198 + 199 + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 200 + out: 201 + trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 202 + 203 + /* If we decrement nr_outstanding to 0, the ref belongs to us. */ 204 + u = atomic_dec_return(&wreq->nr_outstanding); 205 + if (u == 0) 206 + netfs_write_terminated(wreq, was_async); 207 + else if (u == 1) 208 + wake_up_var(&wreq->nr_outstanding); 209 + 210 + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 211 + return; 212 + 213 + incomplete: 214 + if (transferred_or_error == 0) { 215 + if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 216 + subreq->error = -ENODATA; 217 + goto failed; 218 + } 219 + } else { 220 + __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 221 + } 222 + 223 + __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); 224 + set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 225 + goto out; 226 + 227 + failed: 228 + switch (subreq->source) { 229 + case NETFS_WRITE_TO_CACHE: 230 + netfs_stat(&netfs_n_wh_write_failed); 231 + set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 232 + break; 233 + case NETFS_UPLOAD_TO_SERVER: 234 + netfs_stat(&netfs_n_wh_upload_failed); 235 + set_bit(NETFS_RREQ_FAILED, &wreq->flags); 236 + wreq->error = subreq->error; 237 + break; 238 + default: 239 + break; 240 + } 241 + goto out; 242 + } 243 + EXPORT_SYMBOL(netfs_write_subrequest_terminated); 244 + 245 + static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) 246 + { 247 + struct netfs_io_request *wreq = subreq->rreq; 248 + struct netfs_cache_resources *cres = &wreq->cache_resources; 249 + 250 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 251 + 252 + cres->ops->write(cres, subreq->start, &subreq->io_iter, 253 + netfs_write_subrequest_terminated, subreq); 254 + } 255 + 256 + static void netfs_write_to_cache_op_worker(struct work_struct *work) 257 + { 258 + struct netfs_io_subrequest *subreq = 259 + container_of(work, struct netfs_io_subrequest, work); 260 + 261 + netfs_write_to_cache_op(subreq); 262 + } 263 + 264 + /** 265 + * netfs_queue_write_request - Queue a write request for attention 266 + * @subreq: The write request to be queued 267 + * 268 + * Queue the specified write request for processing by a worker thread. We 269 + * pass the caller's ref on the request to the worker thread. 270 + */ 271 + void netfs_queue_write_request(struct netfs_io_subrequest *subreq) 272 + { 273 + if (!queue_work(system_unbound_wq, &subreq->work)) 274 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); 275 + } 276 + EXPORT_SYMBOL(netfs_queue_write_request); 277 + 278 + /* 279 + * Set up a op for writing to the cache. 280 + */ 281 + static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) 282 + { 283 + struct netfs_cache_resources *cres = &wreq->cache_resources; 284 + struct netfs_io_subrequest *subreq; 285 + struct netfs_inode *ctx = netfs_inode(wreq->inode); 286 + struct fscache_cookie *cookie = netfs_i_cookie(ctx); 287 + loff_t start = wreq->start; 288 + size_t len = wreq->len; 289 + int ret; 290 + 291 + if (!fscache_cookie_enabled(cookie)) { 292 + clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); 293 + return; 294 + } 295 + 296 + _debug("write to cache"); 297 + ret = fscache_begin_write_operation(cres, cookie); 298 + if (ret < 0) 299 + return; 300 + 301 + ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, 302 + i_size_read(wreq->inode), true); 303 + if (ret < 0) 304 + return; 305 + 306 + subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, 307 + netfs_write_to_cache_op_worker); 308 + if (!subreq) 309 + return; 310 + 311 + netfs_write_to_cache_op(subreq); 312 + } 313 + 314 + /* 315 + * Begin the process of writing out a chunk of data. 316 + * 317 + * We are given a write request that holds a series of dirty regions and 318 + * (partially) covers a sequence of folios, all of which are present. The 319 + * pages must have been marked as writeback as appropriate. 320 + * 321 + * We need to perform the following steps: 322 + * 323 + * (1) If encrypting, create an output buffer and encrypt each block of the 324 + * data into it, otherwise the output buffer will point to the original 325 + * folios. 326 + * 327 + * (2) If the data is to be cached, set up a write op for the entire output 328 + * buffer to the cache, if the cache wants to accept it. 329 + * 330 + * (3) If the data is to be uploaded (ie. not merely cached): 331 + * 332 + * (a) If the data is to be compressed, create a compression buffer and 333 + * compress the data into it. 334 + * 335 + * (b) For each destination we want to upload to, set up write ops to write 336 + * to that destination. We may need multiple writes if the data is not 337 + * contiguous or the span exceeds wsize for a server. 338 + */ 339 + int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 340 + enum netfs_write_trace what) 341 + { 342 + struct netfs_inode *ctx = netfs_inode(wreq->inode); 343 + 344 + _enter("R=%x %llx-%llx f=%lx", 345 + wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, 346 + wreq->flags); 347 + 348 + trace_netfs_write(wreq, what); 349 + if (wreq->len == 0 || wreq->iter.count == 0) { 350 + pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); 351 + return -EIO; 352 + } 353 + 354 + if (wreq->origin == NETFS_DIO_WRITE) 355 + inode_dio_begin(wreq->inode); 356 + 357 + wreq->io_iter = wreq->iter; 358 + 359 + /* ->outstanding > 0 carries a ref */ 360 + netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 361 + atomic_set(&wreq->nr_outstanding, 1); 362 + 363 + /* Start the encryption/compression going. We can do that in the 364 + * background whilst we generate a list of write ops that we want to 365 + * perform. 366 + */ 367 + // TODO: Encrypt or compress the region as appropriate 368 + 369 + /* We need to write all of the region to the cache */ 370 + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 371 + netfs_set_up_write_to_cache(wreq); 372 + 373 + /* However, we don't necessarily write all of the region to the server. 374 + * Caching of reads is being managed this way also. 375 + */ 376 + if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 377 + ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); 378 + 379 + if (atomic_dec_and_test(&wreq->nr_outstanding)) 380 + netfs_write_terminated(wreq, false); 381 + 382 + if (!may_wait) 383 + return -EIOCBQUEUED; 384 + 385 + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 386 + TASK_UNINTERRUPTIBLE); 387 + return wreq->error; 388 + } 389 + 390 + /* 391 + * Begin a write operation for writing through the pagecache. 392 + */ 393 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 394 + { 395 + struct netfs_io_request *wreq; 396 + struct file *file = iocb->ki_filp; 397 + 398 + wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, 399 + NETFS_WRITETHROUGH); 400 + if (IS_ERR(wreq)) 401 + return wreq; 402 + 403 + trace_netfs_write(wreq, netfs_write_trace_writethrough); 404 + 405 + __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 406 + iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); 407 + wreq->io_iter = wreq->iter; 408 + 409 + /* ->outstanding > 0 carries a ref */ 410 + netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 411 + atomic_set(&wreq->nr_outstanding, 1); 412 + return wreq; 413 + } 414 + 415 + static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) 416 + { 417 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 418 + unsigned long long start; 419 + size_t len; 420 + 421 + if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 422 + return; 423 + 424 + start = wreq->start + wreq->submitted; 425 + len = wreq->iter.count - wreq->submitted; 426 + if (!final) { 427 + len /= wreq->wsize; /* Round to number of maximum packets */ 428 + len *= wreq->wsize; 429 + } 430 + 431 + ictx->ops->create_write_requests(wreq, start, len); 432 + wreq->submitted += len; 433 + } 434 + 435 + /* 436 + * Advance the state of the write operation used when writing through the 437 + * pagecache. Data has been copied into the pagecache that we need to append 438 + * to the request. If we've added more than wsize then we need to create a new 439 + * subrequest. 440 + */ 441 + int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) 442 + { 443 + _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", 444 + wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); 445 + 446 + wreq->iter.count += copied; 447 + wreq->io_iter.count += copied; 448 + if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) 449 + netfs_submit_writethrough(wreq, false); 450 + 451 + return wreq->error; 452 + } 453 + 454 + /* 455 + * End a write operation used when writing through the pagecache. 456 + */ 457 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) 458 + { 459 + int ret = -EIOCBQUEUED; 460 + 461 + _enter("ic=%zu sb=%zu ws=%u", 462 + wreq->iter.count, wreq->submitted, wreq->wsize); 463 + 464 + if (wreq->submitted < wreq->io_iter.count) 465 + netfs_submit_writethrough(wreq, true); 466 + 467 + if (atomic_dec_and_test(&wreq->nr_outstanding)) 468 + netfs_write_terminated(wreq, false); 469 + 470 + if (is_sync_kiocb(iocb)) { 471 + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 472 + TASK_UNINTERRUPTIBLE); 473 + ret = wreq->error; 474 + } 475 + 476 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 477 + return ret; 478 + }

+30 -12

fs/netfs/stats.c

··· 9 9 #include <linux/seq_file.h> 10 10 #include "internal.h" 11 11 12 + atomic_t netfs_n_rh_dio_read; 13 + atomic_t netfs_n_rh_dio_write; 12 14 atomic_t netfs_n_rh_readahead; 13 15 atomic_t netfs_n_rh_readpage; 14 16 atomic_t netfs_n_rh_rreq; ··· 29 27 atomic_t netfs_n_rh_write_done; 30 28 atomic_t netfs_n_rh_write_failed; 31 29 atomic_t netfs_n_rh_write_zskip; 30 + atomic_t netfs_n_wh_wstream_conflict; 31 + atomic_t netfs_n_wh_upload; 32 + atomic_t netfs_n_wh_upload_done; 33 + atomic_t netfs_n_wh_upload_failed; 34 + atomic_t netfs_n_wh_write; 35 + atomic_t netfs_n_wh_write_done; 36 + atomic_t netfs_n_wh_write_failed; 32 37 33 - void netfs_stats_show(struct seq_file *m) 38 + int netfs_stats_show(struct seq_file *m, void *v) 34 39 { 35 - seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n", 40 + seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n", 41 + atomic_read(&netfs_n_rh_dio_read), 42 + atomic_read(&netfs_n_rh_dio_write), 36 43 atomic_read(&netfs_n_rh_readahead), 37 44 atomic_read(&netfs_n_rh_readpage), 38 45 atomic_read(&netfs_n_rh_write_begin), 39 - atomic_read(&netfs_n_rh_write_zskip), 40 - atomic_read(&netfs_n_rh_rreq), 41 - atomic_read(&netfs_n_rh_sreq)); 42 - seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n", 46 + atomic_read(&netfs_n_rh_write_zskip)); 47 + seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n", 43 48 atomic_read(&netfs_n_rh_zero), 44 49 atomic_read(&netfs_n_rh_short_read), 45 50 atomic_read(&netfs_n_rh_write_zskip)); 46 - seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n", 51 + seq_printf(m, "Netfs : DL=%u ds=%u df=%u di=%u\n", 47 52 atomic_read(&netfs_n_rh_download), 48 53 atomic_read(&netfs_n_rh_download_done), 49 54 atomic_read(&netfs_n_rh_download_failed), 50 55 atomic_read(&netfs_n_rh_download_instead)); 51 - seq_printf(m, "RdHelp : RD=%u rs=%u rf=%u\n", 56 + seq_printf(m, "Netfs : RD=%u rs=%u rf=%u\n", 52 57 atomic_read(&netfs_n_rh_read), 53 58 atomic_read(&netfs_n_rh_read_done), 54 59 atomic_read(&netfs_n_rh_read_failed)); 55 - seq_printf(m, "RdHelp : WR=%u ws=%u wf=%u\n", 56 - atomic_read(&netfs_n_rh_write), 57 - atomic_read(&netfs_n_rh_write_done), 58 - atomic_read(&netfs_n_rh_write_failed)); 60 + seq_printf(m, "Netfs : UL=%u us=%u uf=%u\n", 61 + atomic_read(&netfs_n_wh_upload), 62 + atomic_read(&netfs_n_wh_upload_done), 63 + atomic_read(&netfs_n_wh_upload_failed)); 64 + seq_printf(m, "Netfs : WR=%u ws=%u wf=%u\n", 65 + atomic_read(&netfs_n_wh_write), 66 + atomic_read(&netfs_n_wh_write_done), 67 + atomic_read(&netfs_n_wh_write_failed)); 68 + seq_printf(m, "Netfs : rr=%u sr=%u wsc=%u\n", 69 + atomic_read(&netfs_n_rh_rreq), 70 + atomic_read(&netfs_n_rh_sreq), 71 + atomic_read(&netfs_n_wh_wstream_conflict)); 72 + return fscache_stats_show(m); 59 73 } 60 74 EXPORT_SYMBOL(netfs_stats_show);

+2 -2

fs/nfs/Kconfig

··· 169 169 170 170 config NFS_FSCACHE 171 171 bool "Provide NFS client caching support" 172 - depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y 173 - select NETFS_SUPPORT 172 + depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y 173 + select FSCACHE 174 174 help 175 175 Say Y here if you want NFS data to be cached locally on disc through 176 176 the general filesystem cache manager

-7

fs/nfs/fscache.c

··· 274 274 put_nfs_open_context(rreq->netfs_priv); 275 275 } 276 276 277 - static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq) 278 - { 279 - return fscache_begin_read_operation(&rreq->cache_resources, 280 - netfs_i_cookie(netfs_inode(rreq->inode))); 281 - } 282 - 283 277 static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq) 284 278 { 285 279 struct nfs_netfs_io_data *netfs; ··· 381 387 const struct netfs_request_ops nfs_netfs_ops = { 382 388 .init_request = nfs_netfs_init_request, 383 389 .free_request = nfs_netfs_free_request, 384 - .begin_cache_operation = nfs_netfs_begin_cache_operation, 385 390 .issue_read = nfs_netfs_issue_read, 386 391 .clamp_length = nfs_netfs_clamp_length 387 392 };

+1 -1

fs/nfs/fscache.h

··· 80 80 } 81 81 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) 82 82 { 83 - netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops); 83 + netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); 84 84 } 85 85 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); 86 86 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);

+4 -5

fs/smb/client/cifsfs.c

··· 430 430 cifs_evict_inode(struct inode *inode) 431 431 { 432 432 truncate_inode_pages_final(&inode->i_data); 433 - if (inode->i_state & I_PINNING_FSCACHE_WB) 433 + if (inode->i_state & I_PINNING_NETFS_WB) 434 434 cifs_fscache_unuse_inode_cookie(inode, true); 435 435 cifs_fscache_release_inode_cookie(inode); 436 436 clear_inode(inode); ··· 793 793 794 794 static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) 795 795 { 796 - fscache_unpin_writeback(wbc, cifs_inode_cookie(inode)); 797 - return 0; 796 + return netfs_unpin_writeback(inode, wbc); 798 797 } 799 798 800 799 static int cifs_drop_inode(struct inode *inode) ··· 1221 1222 if (rc < 0) 1222 1223 goto set_failed; 1223 1224 1224 - netfs_resize_file(&src_cifsi->netfs, src_end); 1225 + netfs_resize_file(&src_cifsi->netfs, src_end, true); 1225 1226 fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end); 1226 1227 return 0; 1227 1228 ··· 1352 1353 smb_file_src, smb_file_target, off, len, destoff); 1353 1354 if (rc == 0 && new_size > i_size_read(target_inode)) { 1354 1355 truncate_setsize(target_inode, new_size); 1355 - netfs_resize_file(&target_cifsi->netfs, new_size); 1356 + netfs_resize_file(&target_cifsi->netfs, new_size, true); 1356 1357 fscache_resize_cookie(cifs_inode_cookie(target_inode), 1357 1358 new_size); 1358 1359 }

+2 -16

fs/smb/client/file.c

··· 5043 5043 /* do we need to unpin (or unlock) the file */ 5044 5044 } 5045 5045 5046 - /* 5047 - * Mark a page as having been made dirty and thus needing writeback. We also 5048 - * need to pin the cache object to write back to. 5049 - */ 5050 - #ifdef CONFIG_CIFS_FSCACHE 5051 - static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5052 - { 5053 - return fscache_dirty_folio(mapping, folio, 5054 - cifs_inode_cookie(mapping->host)); 5055 - } 5056 - #else 5057 - #define cifs_dirty_folio filemap_dirty_folio 5058 - #endif 5059 - 5060 5046 const struct address_space_operations cifs_addr_ops = { 5061 5047 .read_folio = cifs_read_folio, 5062 5048 .readahead = cifs_readahead, 5063 5049 .writepages = cifs_writepages, 5064 5050 .write_begin = cifs_write_begin, 5065 5051 .write_end = cifs_write_end, 5066 - .dirty_folio = cifs_dirty_folio, 5052 + .dirty_folio = netfs_dirty_folio, 5067 5053 .release_folio = cifs_release_folio, 5068 5054 .direct_IO = cifs_direct_io, 5069 5055 .invalidate_folio = cifs_invalidate_folio, ··· 5073 5087 .writepages = cifs_writepages, 5074 5088 .write_begin = cifs_write_begin, 5075 5089 .write_end = cifs_write_end, 5076 - .dirty_folio = cifs_dirty_folio, 5090 + .dirty_folio = netfs_dirty_folio, 5077 5091 .release_folio = cifs_release_folio, 5078 5092 .invalidate_folio = cifs_invalidate_folio, 5079 5093 .launder_folio = cifs_launder_folio,

+1 -1

fs/smb/client/fscache.c

··· 180 180 if (ret < 0) 181 181 return ret; 182 182 183 - ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode), 183 + ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode), 184 184 no_space_allocated_yet); 185 185 if (ret == 0) 186 186 ret = fscache_write(&cres, start, &iter, NULL, NULL);

+1 -1

include/linux/fs.h

··· 2371 2371 #define I_CREATING (1 << 15) 2372 2372 #define I_DONTCACHE (1 << 16) 2373 2373 #define I_SYNC_QUEUED (1 << 17) 2374 - #define I_PINNING_FSCACHE_WB (1 << 18) 2374 + #define I_PINNING_NETFS_WB (1 << 18) 2375 2375 2376 2376 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) 2377 2377 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)

+3

include/linux/fscache-cache.h

··· 189 189 extern atomic_t fscache_n_no_write_space; 190 190 extern atomic_t fscache_n_no_create_space; 191 191 extern atomic_t fscache_n_culled; 192 + extern atomic_t fscache_n_dio_misfit; 192 193 #define fscache_count_read() atomic_inc(&fscache_n_read) 193 194 #define fscache_count_write() atomic_inc(&fscache_n_write) 194 195 #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) 195 196 #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) 196 197 #define fscache_count_culled() atomic_inc(&fscache_n_culled) 198 + #define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit) 197 199 #else 198 200 #define fscache_count_read() do {} while(0) 199 201 #define fscache_count_write() do {} while(0) 200 202 #define fscache_count_no_write_space() do {} while(0) 201 203 #define fscache_count_no_create_space() do {} while(0) 202 204 #define fscache_count_culled() do {} while(0) 205 + #define fscache_count_dio_misfit() do {} while(0) 203 206 #endif 204 207 205 208 #endif /* _LINUX_FSCACHE_CACHE_H */

-45

include/linux/fscache.h

··· 437 437 * indicates the cache resources to which the operation state should be 438 438 * attached; @cookie indicates the cache object that will be accessed. 439 439 * 440 - * This is intended to be called from the ->begin_cache_operation() netfs lib 441 - * operation as implemented by the network filesystem. 442 - * 443 440 * @cres->inval_counter is set from @cookie->inval_counter for comparison at 444 441 * the end of the operation. This allows invalidation during the operation to 445 442 * be detected by the caller. ··· 624 627 else if (term_func) 625 628 term_func(term_func_priv, -ENOBUFS, false); 626 629 627 - } 628 - 629 - #if __fscache_available 630 - bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, 631 - struct fscache_cookie *cookie); 632 - #else 633 - #define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ 634 - filemap_dirty_folio(MAPPING, FOLIO) 635 - #endif 636 - 637 - /** 638 - * fscache_unpin_writeback - Unpin writeback resources 639 - * @wbc: The writeback control 640 - * @cookie: The cookie referring to the cache object 641 - * 642 - * Unpin the writeback resources pinned by fscache_dirty_folio(). This is 643 - * intended to be called by the netfs's ->write_inode() method. 644 - */ 645 - static inline void fscache_unpin_writeback(struct writeback_control *wbc, 646 - struct fscache_cookie *cookie) 647 - { 648 - if (wbc->unpinned_fscache_wb) 649 - fscache_unuse_cookie(cookie, NULL, NULL); 650 - } 651 - 652 - /** 653 - * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode 654 - * @cookie: The cookie referring to the cache object 655 - * @inode: The inode to clean up 656 - * @aux: Auxiliary data to apply to the inode 657 - * 658 - * Clear any writeback resources held by an inode when the inode is evicted. 659 - * This must be called before clear_inode() is called. 660 - */ 661 - static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, 662 - struct inode *inode, 663 - const void *aux) 664 - { 665 - if (inode->i_state & I_PINNING_FSCACHE_WB) { 666 - loff_t i_size = i_size_read(inode); 667 - fscache_unuse_cookie(cookie, aux, &i_size); 668 - } 669 630 } 670 631 671 632 /**

+166 -13

include/linux/netfs.h

··· 109 109 return folio_wait_private_2_killable(page_folio(page)); 110 110 } 111 111 112 + /* Marks used on xarray-based buffers */ 113 + #define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ 114 + #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ 115 + 112 116 enum netfs_io_source { 113 117 NETFS_FILL_WITH_ZEROES, 114 118 NETFS_DOWNLOAD_FROM_SERVER, 115 119 NETFS_READ_FROM_CACHE, 116 120 NETFS_INVALID_READ, 121 + NETFS_UPLOAD_TO_SERVER, 122 + NETFS_WRITE_TO_CACHE, 123 + NETFS_INVALID_WRITE, 117 124 } __mode(byte); 118 125 119 126 typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, ··· 136 129 struct fscache_cookie *cache; 137 130 #endif 138 131 loff_t remote_i_size; /* Size of the remote file */ 132 + loff_t zero_point; /* Size after which we assume there's no data 133 + * on the server */ 134 + unsigned long flags; 135 + #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ 136 + #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ 137 + #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ 138 + #define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ 139 139 }; 140 + 141 + /* 142 + * A netfs group - for instance a ceph snap. This is marked on dirty pages and 143 + * pages marked with a group must be flushed before they can be written under 144 + * the domain of another group. 145 + */ 146 + struct netfs_group { 147 + refcount_t ref; 148 + void (*free)(struct netfs_group *netfs_group); 149 + }; 150 + 151 + /* 152 + * Information about a dirty page (attached only if necessary). 153 + * folio->private 154 + */ 155 + struct netfs_folio { 156 + struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ 157 + unsigned int dirty_offset; /* Write-streaming dirty data offset */ 158 + unsigned int dirty_len; /* Write-streaming dirty data length */ 159 + }; 160 + #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ 161 + 162 + static inline struct netfs_folio *netfs_folio_info(struct folio *folio) 163 + { 164 + void *priv = folio_get_private(folio); 165 + 166 + if ((unsigned long)priv & NETFS_FOLIO_INFO) 167 + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 168 + return NULL; 169 + } 170 + 171 + static inline struct netfs_group *netfs_folio_group(struct folio *folio) 172 + { 173 + struct netfs_folio *finfo; 174 + void *priv = folio_get_private(folio); 175 + 176 + finfo = netfs_folio_info(folio); 177 + if (finfo) 178 + return finfo->netfs_group; 179 + return priv; 180 + } 140 181 141 182 /* 142 183 * Resources required to do operations on a cache. ··· 198 143 }; 199 144 200 145 /* 201 - * Descriptor for a single component subrequest. 146 + * Descriptor for a single component subrequest. Each operation represents an 147 + * individual read/write from/to a server, a cache, a journal, etc.. 148 + * 149 + * The buffer iterator is persistent for the life of the subrequest struct and 150 + * the pages it points to can be relied on to exist for the duration. 202 151 */ 203 152 struct netfs_io_subrequest { 204 153 struct netfs_io_request *rreq; /* Supervising I/O request */ 154 + struct work_struct work; 205 155 struct list_head rreq_link; /* Link in rreq->subrequests */ 156 + struct iov_iter io_iter; /* Iterator for this subrequest */ 206 157 loff_t start; /* Where to start the I/O */ 207 158 size_t len; /* Size of the I/O */ 208 159 size_t transferred; /* Amount of data transferred */ 209 160 refcount_t ref; 210 161 short error; /* 0 or error that occurred */ 211 162 unsigned short debug_index; /* Index in list (for debugging output) */ 163 + unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ 212 164 enum netfs_io_source source; /* Where to read from/write to */ 213 165 unsigned long flags; 214 166 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ ··· 230 168 NETFS_READAHEAD, /* This read was triggered by readahead */ 231 169 NETFS_READPAGE, /* This read is a synchronous read */ 232 170 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ 171 + NETFS_WRITEBACK, /* This write was triggered by writepages */ 172 + NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ 173 + NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ 174 + NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ 175 + NETFS_DIO_READ, /* This is a direct I/O read */ 176 + NETFS_DIO_WRITE, /* This is a direct I/O write */ 177 + nr__netfs_io_origin 233 178 } __mode(byte); 234 179 235 180 /* ··· 244 175 * operations to a variety of data stores and then stitch the result together. 245 176 */ 246 177 struct netfs_io_request { 247 - struct work_struct work; 178 + union { 179 + struct work_struct work; 180 + struct rcu_head rcu; 181 + }; 248 182 struct inode *inode; /* The file being accessed */ 249 183 struct address_space *mapping; /* The mapping being accessed */ 184 + struct kiocb *iocb; /* AIO completion vector */ 250 185 struct netfs_cache_resources cache_resources; 186 + struct list_head proc_link; /* Link in netfs_iorequests */ 251 187 struct list_head subrequests; /* Contributory I/O operations */ 188 + struct iov_iter iter; /* Unencrypted-side iterator */ 189 + struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ 252 190 void *netfs_priv; /* Private data for the netfs */ 191 + struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ 192 + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ 253 193 unsigned int debug_id; 194 + unsigned int rsize; /* Maximum read size (0 for none) */ 195 + unsigned int wsize; /* Maximum write size (0 for none) */ 196 + unsigned int subreq_counter; /* Next subreq->debug_index */ 254 197 atomic_t nr_outstanding; /* Number of ops in progress */ 255 198 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ 256 199 size_t submitted; /* Amount submitted for I/O so far */ 257 200 size_t len; /* Length of the request */ 201 + size_t upper_len; /* Length can be extended to here */ 202 + size_t transferred; /* Amount to be indicated as transferred */ 258 203 short error; /* 0 or error that occurred */ 259 204 enum netfs_io_origin origin; /* Origin of the request */ 205 + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ 260 206 loff_t i_size; /* Size of the file */ 261 207 loff_t start; /* Start position */ 262 208 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ ··· 283 199 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ 284 200 #define NETFS_RREQ_FAILED 4 /* The request failed */ 285 201 #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ 202 + #define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ 203 + #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ 204 + #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ 205 + #define NETFS_RREQ_BLOCKED 10 /* We blocked */ 286 206 const struct netfs_request_ops *netfs_ops; 207 + void (*cleanup)(struct netfs_io_request *req); 287 208 }; 288 209 289 210 /* 290 211 * Operations the network filesystem can/must provide to the helpers. 291 212 */ 292 213 struct netfs_request_ops { 214 + unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ 215 + unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ 293 216 int (*init_request)(struct netfs_io_request *rreq, struct file *file); 294 217 void (*free_request)(struct netfs_io_request *rreq); 295 - int (*begin_cache_operation)(struct netfs_io_request *rreq); 218 + void (*free_subrequest)(struct netfs_io_subrequest *rreq); 296 219 220 + /* Read request handling */ 297 221 void (*expand_readahead)(struct netfs_io_request *rreq); 298 222 bool (*clamp_length)(struct netfs_io_subrequest *subreq); 299 223 void (*issue_read)(struct netfs_io_subrequest *subreq); ··· 309 217 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, 310 218 struct folio **foliop, void **_fsdata); 311 219 void (*done)(struct netfs_io_request *rreq); 220 + 221 + /* Modification handling */ 222 + void (*update_i_size)(struct inode *inode, loff_t i_size); 223 + 224 + /* Write request handling */ 225 + void (*create_write_requests)(struct netfs_io_request *wreq, 226 + loff_t start, size_t len); 227 + void (*invalidate_cache)(struct netfs_io_request *wreq); 312 228 }; 313 229 314 230 /* ··· 329 229 }; 330 230 331 231 /* 332 - * Table of operations for access to a cache. This is obtained by 333 - * rreq->ops->begin_cache_operation(). 232 + * Table of operations for access to a cache. 334 233 */ 335 234 struct netfs_cache_ops { 336 235 /* End an operation */ ··· 364 265 * actually do. 365 266 */ 366 267 int (*prepare_write)(struct netfs_cache_resources *cres, 367 - loff_t *_start, size_t *_len, loff_t i_size, 368 - bool no_space_allocated_yet); 268 + loff_t *_start, size_t *_len, size_t upper_len, 269 + loff_t i_size, bool no_space_allocated_yet); 369 270 370 271 /* Prepare an on-demand read operation, shortening it to a cached/uncached 371 272 * boundary as appropriate. ··· 383 284 loff_t *_data_start, size_t *_data_len); 384 285 }; 385 286 287 + /* High-level read API. */ 288 + ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); 289 + ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); 290 + ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); 291 + 292 + /* High-level write API */ 293 + ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, 294 + struct netfs_group *netfs_group); 295 + ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, 296 + struct netfs_group *netfs_group); 297 + ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); 298 + ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); 299 + 300 + /* Address operations API */ 386 301 struct readahead_control; 387 302 void netfs_readahead(struct readahead_control *); 388 303 int netfs_read_folio(struct file *, struct folio *); 389 304 int netfs_write_begin(struct netfs_inode *, struct file *, 390 - struct address_space *, loff_t pos, unsigned int len, 391 - struct folio **, void **fsdata); 305 + struct address_space *, loff_t pos, unsigned int len, 306 + struct folio **, void **fsdata); 307 + int netfs_writepages(struct address_space *mapping, 308 + struct writeback_control *wbc); 309 + bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); 310 + int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); 311 + void netfs_clear_inode_writeback(struct inode *inode, const void *aux); 312 + void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); 313 + bool netfs_release_folio(struct folio *folio, gfp_t gfp); 314 + int netfs_launder_folio(struct folio *folio); 392 315 316 + /* VMA operations API. */ 317 + vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); 318 + 319 + /* (Sub)request management API. */ 393 320 void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); 394 321 void netfs_get_subrequest(struct netfs_io_subrequest *subreq, 395 322 enum netfs_sreq_ref_trace what); 396 323 void netfs_put_subrequest(struct netfs_io_subrequest *subreq, 397 324 bool was_async, enum netfs_sreq_ref_trace what); 398 - void netfs_stats_show(struct seq_file *); 399 325 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, 400 326 struct iov_iter *new, 401 327 iov_iter_extraction_t extraction_flags); 328 + size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 329 + size_t max_size, size_t max_segs); 330 + struct netfs_io_subrequest *netfs_create_write_request( 331 + struct netfs_io_request *wreq, enum netfs_io_source dest, 332 + loff_t start, size_t len, work_func_t worker); 333 + void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 334 + bool was_async); 335 + void netfs_queue_write_request(struct netfs_io_subrequest *subreq); 336 + 337 + int netfs_start_io_read(struct inode *inode); 338 + void netfs_end_io_read(struct inode *inode); 339 + int netfs_start_io_write(struct inode *inode); 340 + void netfs_end_io_write(struct inode *inode); 341 + int netfs_start_io_direct(struct inode *inode); 342 + void netfs_end_io_direct(struct inode *inode); 402 343 403 344 /** 404 345 * netfs_inode - Get the netfs inode context from the inode ··· 456 317 * netfs_inode_init - Initialise a netfslib inode context 457 318 * @ctx: The netfs inode to initialise 458 319 * @ops: The netfs's operations list 320 + * @use_zero_point: True to use the zero_point read optimisation 459 321 * 460 322 * Initialise the netfs library context struct. This is expected to follow on 461 323 * directly from the VFS inode struct. 462 324 */ 463 325 static inline void netfs_inode_init(struct netfs_inode *ctx, 464 - const struct netfs_request_ops *ops) 326 + const struct netfs_request_ops *ops, 327 + bool use_zero_point) 465 328 { 466 329 ctx->ops = ops; 467 330 ctx->remote_i_size = i_size_read(&ctx->inode); 331 + ctx->zero_point = LLONG_MAX; 332 + ctx->flags = 0; 468 333 #if IS_ENABLED(CONFIG_FSCACHE) 469 334 ctx->cache = NULL; 470 335 #endif 336 + /* ->releasepage() drives zero_point */ 337 + if (use_zero_point) { 338 + ctx->zero_point = ctx->remote_i_size; 339 + mapping_set_release_always(ctx->inode.i_mapping); 340 + } 471 341 } 472 342 473 343 /** 474 344 * netfs_resize_file - Note that a file got resized 475 345 * @ctx: The netfs inode being resized 476 346 * @new_i_size: The new file size 347 + * @changed_on_server: The change was applied to the server 477 348 * 478 349 * Inform the netfs lib that a file got resized so that it can adjust its state. 479 350 */ 480 - static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) 351 + static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, 352 + bool changed_on_server) 481 353 { 482 - ctx->remote_i_size = new_i_size; 354 + if (changed_on_server) 355 + ctx->remote_i_size = new_i_size; 356 + if (new_i_size < ctx->zero_point) 357 + ctx->zero_point = new_i_size; 483 358 } 484 359 485 360 /**

+1 -1

include/linux/writeback.h

··· 60 60 unsigned for_reclaim:1; /* Invoked from the page allocator */ 61 61 unsigned range_cyclic:1; /* range_start is cyclic */ 62 62 unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ 63 - unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ 63 + unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ 64 64 65 65 /* 66 66 * When writeback IOs are bounced through async layers, only the

-31

include/trace/events/afs.h

··· 902 902 __entry->vnode, __entry->off, __entry->i_size) 903 903 ); 904 904 905 - TRACE_EVENT(afs_folio_dirty, 906 - TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio), 907 - 908 - TP_ARGS(vnode, where, folio), 909 - 910 - TP_STRUCT__entry( 911 - __field(struct afs_vnode *, vnode) 912 - __field(const char *, where) 913 - __field(pgoff_t, index) 914 - __field(unsigned long, from) 915 - __field(unsigned long, to) 916 - ), 917 - 918 - TP_fast_assign( 919 - unsigned long priv = (unsigned long)folio_get_private(folio); 920 - __entry->vnode = vnode; 921 - __entry->where = where; 922 - __entry->index = folio_index(folio); 923 - __entry->from = afs_folio_dirty_from(folio, priv); 924 - __entry->to = afs_folio_dirty_to(folio, priv); 925 - __entry->to |= (afs_is_folio_dirty_mmapped(priv) ? 926 - (1UL << (BITS_PER_LONG - 1)) : 0); 927 - ), 928 - 929 - TP_printk("vn=%p %lx %s %lx-%lx%s", 930 - __entry->vnode, __entry->index, __entry->where, 931 - __entry->from, 932 - __entry->to & ~(1UL << (BITS_PER_LONG - 1)), 933 - __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "") 934 - ); 935 - 936 905 TRACE_EVENT(afs_call_state, 937 906 TP_PROTO(struct afs_call *call, 938 907 enum afs_call_state from,

+148 -7

include/trace/events/netfs.h

··· 16 16 * Define enums for tracing information. 17 17 */ 18 18 #define netfs_read_traces \ 19 + EM(netfs_read_trace_dio_read, "DIO-READ ") \ 19 20 EM(netfs_read_trace_expanded, "EXPANDED ") \ 20 21 EM(netfs_read_trace_readahead, "READAHEAD") \ 21 22 EM(netfs_read_trace_readpage, "READPAGE ") \ 23 + EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ 22 24 E_(netfs_read_trace_write_begin, "WRITEBEGN") 25 + 26 + #define netfs_write_traces \ 27 + EM(netfs_write_trace_dio_write, "DIO-WRITE") \ 28 + EM(netfs_write_trace_launder, "LAUNDER ") \ 29 + EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ 30 + EM(netfs_write_trace_writeback, "WRITEBACK") \ 31 + E_(netfs_write_trace_writethrough, "WRITETHRU") 23 32 24 33 #define netfs_rreq_origins \ 25 34 EM(NETFS_READAHEAD, "RA") \ 26 35 EM(NETFS_READPAGE, "RP") \ 27 - E_(NETFS_READ_FOR_WRITE, "RW") 36 + EM(NETFS_READ_FOR_WRITE, "RW") \ 37 + EM(NETFS_WRITEBACK, "WB") \ 38 + EM(NETFS_WRITETHROUGH, "WT") \ 39 + EM(NETFS_LAUNDER_WRITE, "LW") \ 40 + EM(NETFS_UNBUFFERED_WRITE, "UW") \ 41 + EM(NETFS_DIO_READ, "DR") \ 42 + E_(NETFS_DIO_WRITE, "DW") 28 43 29 44 #define netfs_rreq_traces \ 30 45 EM(netfs_rreq_trace_assess, "ASSESS ") \ 31 46 EM(netfs_rreq_trace_copy, "COPY ") \ 32 47 EM(netfs_rreq_trace_done, "DONE ") \ 33 48 EM(netfs_rreq_trace_free, "FREE ") \ 49 + EM(netfs_rreq_trace_redirty, "REDIRTY") \ 34 50 EM(netfs_rreq_trace_resubmit, "RESUBMT") \ 35 51 EM(netfs_rreq_trace_unlock, "UNLOCK ") \ 36 - E_(netfs_rreq_trace_unmark, "UNMARK ") 52 + EM(netfs_rreq_trace_unmark, "UNMARK ") \ 53 + EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ 54 + EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ 55 + E_(netfs_rreq_trace_write_done, "WR-DONE") 37 56 38 57 #define netfs_sreq_sources \ 39 58 EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ 40 59 EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ 41 60 EM(NETFS_READ_FROM_CACHE, "READ") \ 42 - E_(NETFS_INVALID_READ, "INVL") \ 61 + EM(NETFS_INVALID_READ, "INVL") \ 62 + EM(NETFS_UPLOAD_TO_SERVER, "UPLD") \ 63 + EM(NETFS_WRITE_TO_CACHE, "WRIT") \ 64 + E_(NETFS_INVALID_WRITE, "INVL") 43 65 44 66 #define netfs_sreq_traces \ 45 67 EM(netfs_sreq_trace_download_instead, "RDOWN") \ 46 68 EM(netfs_sreq_trace_free, "FREE ") \ 69 + EM(netfs_sreq_trace_limited, "LIMIT") \ 47 70 EM(netfs_sreq_trace_prepare, "PREP ") \ 48 71 EM(netfs_sreq_trace_resubmit_short, "SHORT") \ 49 72 EM(netfs_sreq_trace_submit, "SUBMT") \ ··· 78 55 #define netfs_failures \ 79 56 EM(netfs_fail_check_write_begin, "check-write-begin") \ 80 57 EM(netfs_fail_copy_to_cache, "copy-to-cache") \ 58 + EM(netfs_fail_dio_read_short, "dio-read-short") \ 59 + EM(netfs_fail_dio_read_zero, "dio-read-zero") \ 81 60 EM(netfs_fail_read, "read") \ 82 61 EM(netfs_fail_short_read, "short-read") \ 83 - E_(netfs_fail_prepare_write, "prep-write") 62 + EM(netfs_fail_prepare_write, "prep-write") \ 63 + E_(netfs_fail_write, "write") 84 64 85 65 #define netfs_rreq_ref_traces \ 86 - EM(netfs_rreq_trace_get_hold, "GET HOLD ") \ 66 + EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ 87 67 EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ 88 68 EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ 89 69 EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ 90 70 EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ 91 - EM(netfs_rreq_trace_put_hold, "PUT HOLD ") \ 71 + EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ 72 + EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ 92 73 EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ 93 - EM(netfs_rreq_trace_put_zero_len, "PUT ZEROLEN") \ 74 + EM(netfs_rreq_trace_put_work, "PUT WORK ") \ 75 + EM(netfs_rreq_trace_see_work, "SEE WORK ") \ 94 76 E_(netfs_rreq_trace_new, "NEW ") 95 77 96 78 #define netfs_sreq_ref_traces \ ··· 104 76 EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ 105 77 EM(netfs_sreq_trace_new, "NEW ") \ 106 78 EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ 79 + EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ 107 80 EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ 108 81 EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ 109 82 EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \ 83 + EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ 84 + EM(netfs_sreq_trace_put_work, "PUT WORK ") \ 110 85 E_(netfs_sreq_trace_put_terminated, "PUT TERM ") 86 + 87 + #define netfs_folio_traces \ 88 + /* The first few correspond to enum netfs_how_to_modify */ \ 89 + EM(netfs_folio_is_uptodate, "mod-uptodate") \ 90 + EM(netfs_just_prefetch, "mod-prefetch") \ 91 + EM(netfs_whole_folio_modify, "mod-whole-f") \ 92 + EM(netfs_modify_and_clear, "mod-n-clear") \ 93 + EM(netfs_streaming_write, "mod-streamw") \ 94 + EM(netfs_streaming_write_cont, "mod-streamw+") \ 95 + EM(netfs_flush_content, "flush") \ 96 + EM(netfs_streaming_filled_page, "mod-streamw-f") \ 97 + EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ 98 + /* The rest are for writeback */ \ 99 + EM(netfs_folio_trace_clear, "clear") \ 100 + EM(netfs_folio_trace_clear_s, "clear-s") \ 101 + EM(netfs_folio_trace_clear_g, "clear-g") \ 102 + EM(netfs_folio_trace_copy_to_cache, "copy") \ 103 + EM(netfs_folio_trace_end_copy, "end-copy") \ 104 + EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ 105 + EM(netfs_folio_trace_kill, "kill") \ 106 + EM(netfs_folio_trace_launder, "launder") \ 107 + EM(netfs_folio_trace_mkwrite, "mkwrite") \ 108 + EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ 109 + EM(netfs_folio_trace_read_gaps, "read-gaps") \ 110 + EM(netfs_folio_trace_redirty, "redirty") \ 111 + EM(netfs_folio_trace_redirtied, "redirtied") \ 112 + EM(netfs_folio_trace_store, "store") \ 113 + EM(netfs_folio_trace_store_plus, "store+") \ 114 + EM(netfs_folio_trace_wthru, "wthru") \ 115 + E_(netfs_folio_trace_wthru_plus, "wthru+") 111 116 112 117 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY 113 118 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY ··· 151 90 #define E_(a, b) a 152 91 153 92 enum netfs_read_trace { netfs_read_traces } __mode(byte); 93 + enum netfs_write_trace { netfs_write_traces } __mode(byte); 154 94 enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte); 155 95 enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte); 156 96 enum netfs_failure { netfs_failures } __mode(byte); 157 97 enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); 158 98 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); 99 + enum netfs_folio_trace { netfs_folio_traces } __mode(byte); 159 100 160 101 #endif 161 102 ··· 170 107 #define E_(a, b) TRACE_DEFINE_ENUM(a); 171 108 172 109 netfs_read_traces; 110 + netfs_write_traces; 173 111 netfs_rreq_origins; 174 112 netfs_rreq_traces; 175 113 netfs_sreq_sources; ··· 178 114 netfs_failures; 179 115 netfs_rreq_ref_traces; 180 116 netfs_sreq_ref_traces; 117 + netfs_folio_traces; 181 118 182 119 /* 183 120 * Now redefine the EM() and E_() macros to map the enums to the strings that ··· 377 312 __entry->subreq, 378 313 __print_symbolic(__entry->what, netfs_sreq_ref_traces), 379 314 __entry->ref) 315 + ); 316 + 317 + TRACE_EVENT(netfs_folio, 318 + TP_PROTO(struct folio *folio, enum netfs_folio_trace why), 319 + 320 + TP_ARGS(folio, why), 321 + 322 + TP_STRUCT__entry( 323 + __field(ino_t, ino) 324 + __field(pgoff_t, index) 325 + __field(unsigned int, nr) 326 + __field(enum netfs_folio_trace, why) 327 + ), 328 + 329 + TP_fast_assign( 330 + __entry->ino = folio->mapping->host->i_ino; 331 + __entry->why = why; 332 + __entry->index = folio_index(folio); 333 + __entry->nr = folio_nr_pages(folio); 334 + ), 335 + 336 + TP_printk("i=%05lx ix=%05lx-%05lx %s", 337 + __entry->ino, __entry->index, __entry->index + __entry->nr - 1, 338 + __print_symbolic(__entry->why, netfs_folio_traces)) 339 + ); 340 + 341 + TRACE_EVENT(netfs_write_iter, 342 + TP_PROTO(const struct kiocb *iocb, const struct iov_iter *from), 343 + 344 + TP_ARGS(iocb, from), 345 + 346 + TP_STRUCT__entry( 347 + __field(unsigned long long, start ) 348 + __field(size_t, len ) 349 + __field(unsigned int, flags ) 350 + ), 351 + 352 + TP_fast_assign( 353 + __entry->start = iocb->ki_pos; 354 + __entry->len = iov_iter_count(from); 355 + __entry->flags = iocb->ki_flags; 356 + ), 357 + 358 + TP_printk("WRITE-ITER s=%llx l=%zx f=%x", 359 + __entry->start, __entry->len, __entry->flags) 360 + ); 361 + 362 + TRACE_EVENT(netfs_write, 363 + TP_PROTO(const struct netfs_io_request *wreq, 364 + enum netfs_write_trace what), 365 + 366 + TP_ARGS(wreq, what), 367 + 368 + TP_STRUCT__entry( 369 + __field(unsigned int, wreq ) 370 + __field(unsigned int, cookie ) 371 + __field(enum netfs_write_trace, what ) 372 + __field(unsigned long long, start ) 373 + __field(size_t, len ) 374 + ), 375 + 376 + TP_fast_assign( 377 + struct netfs_inode *__ctx = netfs_inode(wreq->inode); 378 + struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); 379 + __entry->wreq = wreq->debug_id; 380 + __entry->cookie = __cookie ? __cookie->debug_id : 0; 381 + __entry->what = what; 382 + __entry->start = wreq->start; 383 + __entry->len = wreq->len; 384 + ), 385 + 386 + TP_printk("R=%08x %s c=%08x by=%llx-%llx", 387 + __entry->wreq, 388 + __print_symbolic(__entry->what, netfs_write_traces), 389 + __entry->cookie, 390 + __entry->start, __entry->start + __entry->len - 1) 380 391 ); 381 392 382 393 #undef EM

+2

mm/filemap.c

··· 2688 2688 2689 2689 return filemap_write_and_wait_range(mapping, pos, end); 2690 2690 } 2691 + EXPORT_SYMBOL_GPL(kiocb_write_and_wait); 2691 2692 2692 2693 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) 2693 2694 { ··· 2716 2715 return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, 2717 2716 end >> PAGE_SHIFT); 2718 2717 } 2718 + EXPORT_SYMBOL_GPL(kiocb_invalidate_pages); 2719 2719 2720 2720 /** 2721 2721 * generic_file_read_iter - generic filesystem read routine