Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfs: Implement cache I/O by accessing the cache directly

Move NFS to using fscache DIO API instead of the old upstream I/O API as
that has been removed. This is a stopgap solution as the intention is that
at sometime in the future, the cache will move to using larger blocks and
won't be able to store individual pages in order to deal with the potential
for data corruption due to the backing filesystem being able insert/remove
bridging blocks of zeros into its extent list[1].

NFS then reads and writes cache pages synchronously and one page at a time.

The preferred change would be to use the netfs lib, but the new I/O API can
be used directly. It's just that as the cache now needs to track data for
itself, caching blocks may exceed page size...

This code is somewhat borrowed from my "fallback I/O" patchset[2].

Changes
=======
ver #3:
- Restore lost =n fallback for nfs_fscache_release_page()[2].

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna.schumaker@netapp.com>
cc: linux-nfs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1]
Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2]
Link: https://lore.kernel.org/r/163189108292.2509237.12615909591150927232.stgit@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/163906981318.143852.17220018647843475985.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967184451.1823006.6450645559828329590.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021577632.640689.11069627070150063812.stgit@warthog.procyon.org.uk/ # v4

+163 -83
+8
fs/fscache/io.c
··· 150 150 } 151 151 EXPORT_SYMBOL(__fscache_begin_read_operation); 152 152 153 + int __fscache_begin_write_operation(struct netfs_cache_resources *cres, 154 + struct fscache_cookie *cookie) 155 + { 156 + return fscache_begin_operation(cres, cookie, FSCACHE_WANT_PARAMS, 157 + fscache_access_io_write); 158 + } 159 + EXPORT_SYMBOL(__fscache_begin_write_operation); 160 + 153 161 /** 154 162 * fscache_set_page_dirty - Mark page dirty and pin a cache object for writeback 155 163 * @page: The page being dirtied
+103 -23
fs/nfs/fscache.c
··· 249 249 } 250 250 } 251 251 252 + static inline void fscache_end_operation(struct netfs_cache_resources *cres) 253 + { 254 + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); 255 + 256 + if (ops) 257 + ops->end_operation(cres); 258 + } 259 + 260 + /* 261 + * Fallback page reading interface. 262 + */ 263 + static int fscache_fallback_read_page(struct inode *inode, struct page *page) 264 + { 265 + struct netfs_cache_resources cres; 266 + struct fscache_cookie *cookie = nfs_i_fscache(inode); 267 + struct iov_iter iter; 268 + struct bio_vec bvec[1]; 269 + int ret; 270 + 271 + memset(&cres, 0, sizeof(cres)); 272 + bvec[0].bv_page = page; 273 + bvec[0].bv_offset = 0; 274 + bvec[0].bv_len = PAGE_SIZE; 275 + iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); 276 + 277 + ret = fscache_begin_read_operation(&cres, cookie); 278 + if (ret < 0) 279 + return ret; 280 + 281 + ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, 282 + NULL, NULL); 283 + fscache_end_operation(&cres); 284 + return ret; 285 + } 286 + 287 + /* 288 + * Fallback page writing interface. 289 + */ 290 + static int fscache_fallback_write_page(struct inode *inode, struct page *page, 291 + bool no_space_allocated_yet) 292 + { 293 + struct netfs_cache_resources cres; 294 + struct fscache_cookie *cookie = nfs_i_fscache(inode); 295 + struct iov_iter iter; 296 + struct bio_vec bvec[1]; 297 + loff_t start = page_offset(page); 298 + size_t len = PAGE_SIZE; 299 + int ret; 300 + 301 + memset(&cres, 0, sizeof(cres)); 302 + bvec[0].bv_page = page; 303 + bvec[0].bv_offset = 0; 304 + bvec[0].bv_len = PAGE_SIZE; 305 + iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); 306 + 307 + ret = fscache_begin_write_operation(&cres, cookie); 308 + if (ret < 0) 309 + return ret; 310 + 311 + ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode), 312 + no_space_allocated_yet); 313 + if (ret == 0) 314 + ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL); 315 + fscache_end_operation(&cres); 316 + return ret; 317 + } 318 + 252 319 /* 253 320 * Retrieve a page from fscache 254 321 */ 255 - int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, 256 - struct inode *inode, struct page *page) 322 + int __nfs_readpage_from_fscache(struct inode *inode, struct page *page) 257 323 { 324 + int ret; 325 + 258 326 dfprintk(FSCACHE, 259 327 "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", 260 328 nfs_i_fscache(inode), page, page->index, page->flags, inode); 261 329 262 330 if (PageChecked(page)) { 331 + dfprintk(FSCACHE, "NFS: readpage_from_fscache: PageChecked\n"); 263 332 ClearPageChecked(page); 264 333 return 1; 265 334 } 266 335 267 - return -ENOBUFS; // TODO: Use netfslib 336 + ret = fscache_fallback_read_page(inode, page); 337 + if (ret < 0) { 338 + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); 339 + dfprintk(FSCACHE, 340 + "NFS: readpage_from_fscache failed %d\n", ret); 341 + SetPageChecked(page); 342 + return ret; 343 + } 344 + 345 + /* Read completed synchronously */ 346 + dfprintk(FSCACHE, "NFS: readpage_from_fscache: read successful\n"); 347 + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); 348 + SetPageUptodate(page); 349 + return 0; 268 350 } 269 351 270 352 /* 271 - * Retrieve a set of pages from fscache 353 + * Store a newly fetched page in fscache. We can be certain there's no page 354 + * stored in the cache as yet otherwise we would've read it from there. 272 355 */ 273 - int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, 274 - struct inode *inode, 275 - struct address_space *mapping, 276 - struct list_head *pages, 277 - unsigned *nr_pages) 356 + void __nfs_readpage_to_fscache(struct inode *inode, struct page *page) 278 357 { 279 - dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", 280 - nfs_i_fscache(inode), *nr_pages, inode); 358 + int ret; 281 359 282 - return -ENOBUFS; // TODO: Use netfslib 283 - } 284 - 285 - /* 286 - * Store a newly fetched page in fscache 287 - * - PG_fscache must be set on the page 288 - */ 289 - void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) 290 - { 291 360 dfprintk(FSCACHE, 292 - "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", 293 - nfs_i_fscache(inode), page, page->index, page->flags, sync); 361 + "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx))\n", 362 + nfs_i_fscache(inode), page, page->index, page->flags); 294 363 295 - return; // TODO: Use netfslib 364 + ret = fscache_fallback_write_page(inode, page, true); 365 + 366 + dfprintk(FSCACHE, 367 + "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", 368 + page, page->index, page->flags, ret); 369 + 370 + if (ret != 0) { 371 + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); 372 + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); 373 + } else { 374 + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK); 375 + } 296 376 }
+11 -41
fs/nfs/fscache.h
··· 44 44 extern void nfs_fscache_open_file(struct inode *, struct file *); 45 45 extern void nfs_fscache_release_file(struct inode *, struct file *); 46 46 47 - extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); 48 - 49 - extern int __nfs_readpage_from_fscache(struct nfs_open_context *, 50 - struct inode *, struct page *); 51 - extern int __nfs_readpages_from_fscache(struct nfs_open_context *, 52 - struct inode *, struct address_space *, 53 - struct list_head *, unsigned *); 54 - extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); 47 + extern int __nfs_readpage_from_fscache(struct inode *, struct page *); 48 + extern void __nfs_read_completion_to_fscache(struct nfs_pgio_header *hdr, 49 + unsigned long bytes); 50 + extern void __nfs_readpage_to_fscache(struct inode *, struct page *); 55 51 56 52 static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) 57 53 { ··· 65 69 /* 66 70 * Retrieve a page from an inode data storage object. 67 71 */ 68 - static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, 69 - struct inode *inode, 72 + static inline int nfs_readpage_from_fscache(struct inode *inode, 70 73 struct page *page) 71 74 { 72 75 if (NFS_I(inode)->fscache) 73 - return __nfs_readpage_from_fscache(ctx, inode, page); 74 - return -ENOBUFS; 75 - } 76 - 77 - /* 78 - * Retrieve a set of pages from an inode data storage object. 79 - */ 80 - static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, 81 - struct inode *inode, 82 - struct address_space *mapping, 83 - struct list_head *pages, 84 - unsigned *nr_pages) 85 - { 86 - if (NFS_I(inode)->fscache) 87 - return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, 88 - nr_pages); 76 + return __nfs_readpage_from_fscache(inode, page); 89 77 return -ENOBUFS; 90 78 } 91 79 ··· 78 98 * in the cache. 79 99 */ 80 100 static inline void nfs_readpage_to_fscache(struct inode *inode, 81 - struct page *page, 82 - int sync) 101 + struct page *page) 83 102 { 84 - if (PageFsCache(page)) 85 - __nfs_readpage_to_fscache(inode, page, sync); 103 + if (NFS_I(inode)->fscache) 104 + __nfs_readpage_to_fscache(inode, page); 86 105 } 87 106 88 107 static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata, ··· 135 156 { 136 157 return 1; /* True: may release page */ 137 158 } 138 - static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, 139 - struct inode *inode, 159 + static inline int nfs_readpage_from_fscache(struct inode *inode, 140 160 struct page *page) 141 161 { 142 162 return -ENOBUFS; 143 163 } 144 - static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, 145 - struct inode *inode, 146 - struct address_space *mapping, 147 - struct list_head *pages, 148 - unsigned *nr_pages) 149 - { 150 - return -ENOBUFS; 151 - } 152 164 static inline void nfs_readpage_to_fscache(struct inode *inode, 153 - struct page *page, int sync) {} 165 + struct page *page) {} 154 166 155 167 156 168 static inline void nfs_fscache_invalidate(struct inode *inode, int flags) {}
+8 -17
fs/nfs/read.c
··· 123 123 struct address_space *mapping = page_file_mapping(page); 124 124 125 125 if (PageUptodate(page)) 126 - nfs_readpage_to_fscache(inode, page, 0); 126 + nfs_readpage_to_fscache(inode, page); 127 127 else if (!PageError(page) && !PagePrivate(page)) 128 128 generic_error_remove_page(mapping, page); 129 129 unlock_page(page); ··· 305 305 306 306 aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE); 307 307 308 + if (!IS_SYNC(page->mapping->host)) { 309 + error = nfs_readpage_from_fscache(page->mapping->host, page); 310 + if (error == 0) 311 + goto out_unlock; 312 + } 313 + 308 314 new = nfs_create_request(desc->ctx, page, 0, aligned_len); 309 315 if (IS_ERR(new)) 310 316 goto out_error; ··· 326 320 return 0; 327 321 out_error: 328 322 error = PTR_ERR(new); 323 + out_unlock: 329 324 unlock_page(page); 330 325 out: 331 326 return error; ··· 373 366 desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); 374 367 375 368 xchg(&desc.ctx->error, 0); 376 - if (!IS_SYNC(inode)) { 377 - ret = nfs_readpage_from_fscache(desc.ctx, inode, page); 378 - if (ret == 0) 379 - goto out_wait; 380 - } 381 - 382 369 nfs_pageio_init_read(&desc.pgio, inode, false, 383 370 &nfs_async_read_completion_ops); 384 371 ··· 382 381 383 382 nfs_pageio_complete_read(&desc.pgio); 384 383 ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0; 385 - out_wait: 386 384 if (!ret) { 387 385 ret = wait_on_page_locked_killable(page); 388 386 if (!PageUptodate(page) && !ret) ··· 419 419 } else 420 420 desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); 421 421 422 - /* attempt to read as many of the pages as possible from the cache 423 - * - this returns -ENOBUFS immediately if the cookie is negative 424 - */ 425 - ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 426 - pages, &nr_pages); 427 - if (ret == 0) 428 - goto read_complete; /* all pages were read */ 429 - 430 422 nfs_pageio_init_read(&desc.pgio, inode, false, 431 423 &nfs_async_read_completion_ops); 432 424 ··· 426 434 427 435 nfs_pageio_complete_read(&desc.pgio); 428 436 429 - read_complete: 430 437 put_nfs_open_context(desc.ctx); 431 438 out: 432 439 trace_nfs_aop_readahead_done(inode, nr_pages, ret);
+5 -2
fs/nfs/write.c
··· 2126 2126 if (PagePrivate(page)) 2127 2127 return -EBUSY; 2128 2128 2129 - if (!nfs_fscache_release_page(page, GFP_KERNEL)) 2130 - return -EBUSY; 2129 + if (PageFsCache(page)) { 2130 + if (mode == MIGRATE_ASYNC) 2131 + return -EBUSY; 2132 + wait_on_page_fscache(page); 2133 + } 2131 2134 2132 2135 return migrate_page(mapping, newpage, page, mode); 2133 2136 }
+28
include/linux/fscache.h
··· 168 168 extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t); 169 169 extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); 170 170 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); 171 + extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); 171 172 172 173 extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, 173 174 loff_t, size_t, loff_t, netfs_io_terminated_t, void *, ··· 498 497 const struct netfs_cache_ops *ops = fscache_operation_valid(cres); 499 498 return ops->read(cres, start_pos, iter, read_hole, 500 499 term_func, term_func_priv); 500 + } 501 + 502 + /** 503 + * fscache_begin_write_operation - Begin a write operation for the netfs lib 504 + * @cres: The cache resources for the write being performed 505 + * @cookie: The cookie representing the cache object 506 + * 507 + * Begin a write operation on behalf of the netfs helper library. @cres 508 + * indicates the cache resources to which the operation state should be 509 + * attached; @cookie indicates the cache object that will be accessed. 510 + * 511 + * @cres->inval_counter is set from @cookie->inval_counter for comparison at 512 + * the end of the operation. This allows invalidation during the operation to 513 + * be detected by the caller. 514 + * 515 + * Returns: 516 + * * 0 - Success 517 + * * -ENOBUFS - No caching available 518 + * * Other error code from the cache, such as -ENOMEM. 519 + */ 520 + static inline 521 + int fscache_begin_write_operation(struct netfs_cache_resources *cres, 522 + struct fscache_cookie *cookie) 523 + { 524 + if (fscache_cookie_enabled(cookie)) 525 + return __fscache_begin_write_operation(cres, cookie); 526 + return -ENOBUFS; 501 527 } 502 528 503 529 /**