at v6.11 21 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* Network filesystem support services. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 * 7 * See: 8 * 9 * Documentation/filesystems/netfs_library.rst 10 * 11 * for a description of the network filesystem interface declared here. 12 */ 13 14#ifndef _LINUX_NETFS_H 15#define _LINUX_NETFS_H 16 17#include <linux/workqueue.h> 18#include <linux/fs.h> 19#include <linux/pagemap.h> 20#include <linux/uio.h> 21 22enum netfs_sreq_ref_trace; 23typedef struct mempool_s mempool_t; 24 25/** 26 * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] 27 * @folio: The folio. 28 * 29 * Call this function before writing a folio to a local cache. Starting a 30 * second write before the first one finishes is not allowed. 31 * 32 * Note that this should no longer be used. 33 */ 34static inline void folio_start_private_2(struct folio *folio) 35{ 36 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); 37 folio_get(folio); 38 folio_set_private_2(folio); 39} 40 41/* Marks used on xarray-based buffers */ 42#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ 43#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ 44 45enum netfs_io_source { 46 NETFS_FILL_WITH_ZEROES, 47 NETFS_DOWNLOAD_FROM_SERVER, 48 NETFS_READ_FROM_CACHE, 49 NETFS_INVALID_READ, 50 NETFS_UPLOAD_TO_SERVER, 51 NETFS_WRITE_TO_CACHE, 52 NETFS_INVALID_WRITE, 53} __mode(byte); 54 55typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, 56 bool was_async); 57 58/* 59 * Per-inode context. This wraps the VFS inode. 60 */ 61struct netfs_inode { 62 struct inode inode; /* The VFS inode */ 63 const struct netfs_request_ops *ops; 64#if IS_ENABLED(CONFIG_FSCACHE) 65 struct fscache_cookie *cache; 66#endif 67 struct mutex wb_lock; /* Writeback serialisation */ 68 loff_t remote_i_size; /* Size of the remote file */ 69 loff_t zero_point; /* Size after which we assume there's no data 70 * on the server */ 71 atomic_t io_count; /* Number of outstanding reqs */ 72 unsigned long flags; 73#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ 74#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ 75#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ 76}; 77 78/* 79 * A netfs group - for instance a ceph snap. This is marked on dirty pages and 80 * pages marked with a group must be flushed before they can be written under 81 * the domain of another group. 82 */ 83struct netfs_group { 84 refcount_t ref; 85 void (*free)(struct netfs_group *netfs_group); 86}; 87 88/* 89 * Information about a dirty page (attached only if necessary). 90 * folio->private 91 */ 92struct netfs_folio { 93 struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ 94 unsigned int dirty_offset; /* Write-streaming dirty data offset */ 95 unsigned int dirty_len; /* Write-streaming dirty data length */ 96}; 97#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ 98#define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */ 99 100static inline bool netfs_is_folio_info(const void *priv) 101{ 102 return (unsigned long)priv & NETFS_FOLIO_INFO; 103} 104 105static inline struct netfs_folio *__netfs_folio_info(const void *priv) 106{ 107 if (netfs_is_folio_info(priv)) 108 return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 109 return NULL; 110} 111 112static inline struct netfs_folio *netfs_folio_info(struct folio *folio) 113{ 114 return __netfs_folio_info(folio_get_private(folio)); 115} 116 117static inline struct netfs_group *netfs_folio_group(struct folio *folio) 118{ 119 struct netfs_folio *finfo; 120 void *priv = folio_get_private(folio); 121 122 finfo = netfs_folio_info(folio); 123 if (finfo) 124 return finfo->netfs_group; 125 return priv; 126} 127 128/* 129 * Stream of I/O subrequests going to a particular destination, such as the 130 * server or the local cache. This is mainly intended for writing where we may 131 * have to write to multiple destinations concurrently. 132 */ 133struct netfs_io_stream { 134 /* Submission tracking */ 135 struct netfs_io_subrequest *construct; /* Op being constructed */ 136 unsigned int submit_off; /* Folio offset we're submitting from */ 137 unsigned int submit_len; /* Amount of data left to submit */ 138 unsigned int submit_max_len; /* Amount I/O can be rounded up to */ 139 void (*prepare_write)(struct netfs_io_subrequest *subreq); 140 void (*issue_write)(struct netfs_io_subrequest *subreq); 141 /* Collection tracking */ 142 struct list_head subrequests; /* Contributory I/O operations */ 143 struct netfs_io_subrequest *front; /* Op being collected */ 144 unsigned long long collected_to; /* Position we've collected results to */ 145 size_t transferred; /* The amount transferred from this stream */ 146 enum netfs_io_source source; /* Where to read from/write to */ 147 unsigned short error; /* Aggregate error for the stream */ 148 unsigned char stream_nr; /* Index of stream in parent table */ 149 bool avail; /* T if stream is available */ 150 bool active; /* T if stream is active */ 151 bool need_retry; /* T if this stream needs retrying */ 152 bool failed; /* T if this stream failed */ 153}; 154 155/* 156 * Resources required to do operations on a cache. 157 */ 158struct netfs_cache_resources { 159 const struct netfs_cache_ops *ops; 160 void *cache_priv; 161 void *cache_priv2; 162 unsigned int debug_id; /* Cookie debug ID */ 163 unsigned int inval_counter; /* object->inval_counter at begin_op */ 164}; 165 166/* 167 * Descriptor for a single component subrequest. Each operation represents an 168 * individual read/write from/to a server, a cache, a journal, etc.. 169 * 170 * The buffer iterator is persistent for the life of the subrequest struct and 171 * the pages it points to can be relied on to exist for the duration. 172 */ 173struct netfs_io_subrequest { 174 struct netfs_io_request *rreq; /* Supervising I/O request */ 175 struct work_struct work; 176 struct list_head rreq_link; /* Link in rreq->subrequests */ 177 struct iov_iter io_iter; /* Iterator for this subrequest */ 178 unsigned long long start; /* Where to start the I/O */ 179 size_t max_len; /* Maximum size of the I/O */ 180 size_t len; /* Size of the I/O */ 181 size_t transferred; /* Amount of data transferred */ 182 refcount_t ref; 183 short error; /* 0 or error that occurred */ 184 unsigned short debug_index; /* Index in list (for debugging output) */ 185 unsigned int nr_segs; /* Number of segs in io_iter */ 186 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ 187 enum netfs_io_source source; /* Where to read from/write to */ 188 unsigned char stream_nr; /* I/O stream this belongs to */ 189 unsigned long flags; 190#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ 191#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ 192#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ 193#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ 194#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ 195#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ 196#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ 197#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ 198#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ 199#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ 200#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ 201#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ 202}; 203 204enum netfs_io_origin { 205 NETFS_READAHEAD, /* This read was triggered by readahead */ 206 NETFS_READPAGE, /* This read is a synchronous read */ 207 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ 208 NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ 209 NETFS_WRITEBACK, /* This write was triggered by writepages */ 210 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ 211 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ 212 NETFS_DIO_READ, /* This is a direct I/O read */ 213 NETFS_DIO_WRITE, /* This is a direct I/O write */ 214 nr__netfs_io_origin 215} __mode(byte); 216 217/* 218 * Descriptor for an I/O helper request. This is used to make multiple I/O 219 * operations to a variety of data stores and then stitch the result together. 220 */ 221struct netfs_io_request { 222 union { 223 struct work_struct work; 224 struct rcu_head rcu; 225 }; 226 struct inode *inode; /* The file being accessed */ 227 struct address_space *mapping; /* The mapping being accessed */ 228 struct kiocb *iocb; /* AIO completion vector */ 229 struct netfs_cache_resources cache_resources; 230 struct list_head proc_link; /* Link in netfs_iorequests */ 231 struct list_head subrequests; /* Contributory I/O operations */ 232 struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ 233#define NR_IO_STREAMS 2 //wreq->nr_io_streams 234 struct netfs_group *group; /* Writeback group being written back */ 235 struct iov_iter iter; /* Unencrypted-side iterator */ 236 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ 237 void *netfs_priv; /* Private data for the netfs */ 238 void *netfs_priv2; /* Private data for the netfs */ 239 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ 240 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ 241 unsigned int debug_id; 242 unsigned int rsize; /* Maximum read size (0 for none) */ 243 unsigned int wsize; /* Maximum write size (0 for none) */ 244 atomic_t subreq_counter; /* Next subreq->debug_index */ 245 unsigned int nr_group_rel; /* Number of refs to release on ->group */ 246 spinlock_t lock; /* Lock for queuing subreqs */ 247 atomic_t nr_outstanding; /* Number of ops in progress */ 248 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ 249 size_t upper_len; /* Length can be extended to here */ 250 unsigned long long submitted; /* Amount submitted for I/O so far */ 251 unsigned long long len; /* Length of the request */ 252 size_t transferred; /* Amount to be indicated as transferred */ 253 short error; /* 0 or error that occurred */ 254 enum netfs_io_origin origin; /* Origin of the request */ 255 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ 256 unsigned long long i_size; /* Size of the file */ 257 unsigned long long start; /* Start position */ 258 atomic64_t issued_to; /* Write issuer folio cursor */ 259 unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ 260 unsigned long long collected_to; /* Point we've collected to */ 261 unsigned long long cleaned_to; /* Position we've cleaned folios to */ 262 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ 263 refcount_t ref; 264 unsigned long flags; 265#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ 266#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ 267#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ 268#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ 269#define NETFS_RREQ_FAILED 4 /* The request failed */ 270#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ 271#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ 272#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ 273#define NETFS_RREQ_BLOCKED 10 /* We blocked */ 274#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ 275#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ 276#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ 277#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 278 * write to cache on read */ 279 const struct netfs_request_ops *netfs_ops; 280 void (*cleanup)(struct netfs_io_request *req); 281}; 282 283/* 284 * Operations the network filesystem can/must provide to the helpers. 285 */ 286struct netfs_request_ops { 287 mempool_t *request_pool; 288 mempool_t *subrequest_pool; 289 int (*init_request)(struct netfs_io_request *rreq, struct file *file); 290 void (*free_request)(struct netfs_io_request *rreq); 291 void (*free_subrequest)(struct netfs_io_subrequest *rreq); 292 293 /* Read request handling */ 294 void (*expand_readahead)(struct netfs_io_request *rreq); 295 bool (*clamp_length)(struct netfs_io_subrequest *subreq); 296 void (*issue_read)(struct netfs_io_subrequest *subreq); 297 bool (*is_still_valid)(struct netfs_io_request *rreq); 298 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, 299 struct folio **foliop, void **_fsdata); 300 void (*done)(struct netfs_io_request *rreq); 301 302 /* Modification handling */ 303 void (*update_i_size)(struct inode *inode, loff_t i_size); 304 void (*post_modify)(struct inode *inode); 305 306 /* Write request handling */ 307 void (*begin_writeback)(struct netfs_io_request *wreq); 308 void (*prepare_write)(struct netfs_io_subrequest *subreq); 309 void (*issue_write)(struct netfs_io_subrequest *subreq); 310 void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream); 311 void (*invalidate_cache)(struct netfs_io_request *wreq); 312}; 313 314/* 315 * How to handle reading from a hole. 316 */ 317enum netfs_read_from_hole { 318 NETFS_READ_HOLE_IGNORE, 319 NETFS_READ_HOLE_CLEAR, 320 NETFS_READ_HOLE_FAIL, 321}; 322 323/* 324 * Table of operations for access to a cache. 325 */ 326struct netfs_cache_ops { 327 /* End an operation */ 328 void (*end_operation)(struct netfs_cache_resources *cres); 329 330 /* Read data from the cache */ 331 int (*read)(struct netfs_cache_resources *cres, 332 loff_t start_pos, 333 struct iov_iter *iter, 334 enum netfs_read_from_hole read_hole, 335 netfs_io_terminated_t term_func, 336 void *term_func_priv); 337 338 /* Write data to the cache */ 339 int (*write)(struct netfs_cache_resources *cres, 340 loff_t start_pos, 341 struct iov_iter *iter, 342 netfs_io_terminated_t term_func, 343 void *term_func_priv); 344 345 /* Write data to the cache from a netfs subrequest. */ 346 void (*issue_write)(struct netfs_io_subrequest *subreq); 347 348 /* Expand readahead request */ 349 void (*expand_readahead)(struct netfs_cache_resources *cres, 350 unsigned long long *_start, 351 unsigned long long *_len, 352 unsigned long long i_size); 353 354 /* Prepare a read operation, shortening it to a cached/uncached 355 * boundary as appropriate. 356 */ 357 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, 358 unsigned long long i_size); 359 360 /* Prepare a write subrequest, working out if we're allowed to do it 361 * and finding out the maximum amount of data to gather before 362 * attempting to submit. If we're not permitted to do it, the 363 * subrequest should be marked failed. 364 */ 365 void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq); 366 367 /* Prepare a write operation, working out what part of the write we can 368 * actually do. 369 */ 370 int (*prepare_write)(struct netfs_cache_resources *cres, 371 loff_t *_start, size_t *_len, size_t upper_len, 372 loff_t i_size, bool no_space_allocated_yet); 373 374 /* Prepare an on-demand read operation, shortening it to a cached/uncached 375 * boundary as appropriate. 376 */ 377 enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres, 378 loff_t start, size_t *_len, 379 loff_t i_size, 380 unsigned long *_flags, ino_t ino); 381 382 /* Query the occupancy of the cache in a region, returning where the 383 * next chunk of data starts and how long it is. 384 */ 385 int (*query_occupancy)(struct netfs_cache_resources *cres, 386 loff_t start, size_t len, size_t granularity, 387 loff_t *_data_start, size_t *_data_len); 388}; 389 390/* High-level read API. */ 391ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter); 392ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); 393ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); 394ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); 395 396/* High-level write API */ 397ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, 398 struct netfs_group *netfs_group); 399ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, 400 struct netfs_group *netfs_group); 401ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); 402ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter, 403 struct netfs_group *netfs_group); 404ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); 405 406/* Address operations API */ 407struct readahead_control; 408void netfs_readahead(struct readahead_control *); 409int netfs_read_folio(struct file *, struct folio *); 410int netfs_write_begin(struct netfs_inode *, struct file *, 411 struct address_space *, loff_t pos, unsigned int len, 412 struct folio **, void **fsdata); 413int netfs_writepages(struct address_space *mapping, 414 struct writeback_control *wbc); 415bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); 416int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); 417void netfs_clear_inode_writeback(struct inode *inode, const void *aux); 418void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); 419bool netfs_release_folio(struct folio *folio, gfp_t gfp); 420 421/* VMA operations API. */ 422vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); 423 424/* (Sub)request management API. */ 425void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); 426void netfs_get_subrequest(struct netfs_io_subrequest *subreq, 427 enum netfs_sreq_ref_trace what); 428void netfs_put_subrequest(struct netfs_io_subrequest *subreq, 429 bool was_async, enum netfs_sreq_ref_trace what); 430ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, 431 struct iov_iter *new, 432 iov_iter_extraction_t extraction_flags); 433size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 434 size_t max_size, size_t max_segs); 435void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); 436void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 437 bool was_async); 438void netfs_queue_write_request(struct netfs_io_subrequest *subreq); 439 440int netfs_start_io_read(struct inode *inode); 441void netfs_end_io_read(struct inode *inode); 442int netfs_start_io_write(struct inode *inode); 443void netfs_end_io_write(struct inode *inode); 444int netfs_start_io_direct(struct inode *inode); 445void netfs_end_io_direct(struct inode *inode); 446 447/** 448 * netfs_inode - Get the netfs inode context from the inode 449 * @inode: The inode to query 450 * 451 * Get the netfs lib inode context from the network filesystem's inode. The 452 * context struct is expected to directly follow on from the VFS inode struct. 453 */ 454static inline struct netfs_inode *netfs_inode(struct inode *inode) 455{ 456 return container_of(inode, struct netfs_inode, inode); 457} 458 459/** 460 * netfs_inode_init - Initialise a netfslib inode context 461 * @ctx: The netfs inode to initialise 462 * @ops: The netfs's operations list 463 * @use_zero_point: True to use the zero_point read optimisation 464 * 465 * Initialise the netfs library context struct. This is expected to follow on 466 * directly from the VFS inode struct. 467 */ 468static inline void netfs_inode_init(struct netfs_inode *ctx, 469 const struct netfs_request_ops *ops, 470 bool use_zero_point) 471{ 472 ctx->ops = ops; 473 ctx->remote_i_size = i_size_read(&ctx->inode); 474 ctx->zero_point = LLONG_MAX; 475 ctx->flags = 0; 476 atomic_set(&ctx->io_count, 0); 477#if IS_ENABLED(CONFIG_FSCACHE) 478 ctx->cache = NULL; 479#endif 480 mutex_init(&ctx->wb_lock); 481 /* ->releasepage() drives zero_point */ 482 if (use_zero_point) { 483 ctx->zero_point = ctx->remote_i_size; 484 mapping_set_release_always(ctx->inode.i_mapping); 485 } 486} 487 488/** 489 * netfs_resize_file - Note that a file got resized 490 * @ctx: The netfs inode being resized 491 * @new_i_size: The new file size 492 * @changed_on_server: The change was applied to the server 493 * 494 * Inform the netfs lib that a file got resized so that it can adjust its state. 495 */ 496static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, 497 bool changed_on_server) 498{ 499 if (changed_on_server) 500 ctx->remote_i_size = new_i_size; 501 if (new_i_size < ctx->zero_point) 502 ctx->zero_point = new_i_size; 503} 504 505/** 506 * netfs_i_cookie - Get the cache cookie from the inode 507 * @ctx: The netfs inode to query 508 * 509 * Get the caching cookie (if enabled) from the network filesystem's inode. 510 */ 511static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) 512{ 513#if IS_ENABLED(CONFIG_FSCACHE) 514 return ctx->cache; 515#else 516 return NULL; 517#endif 518} 519 520/** 521 * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete 522 * @inode: The netfs inode to wait on 523 * 524 * Wait for outstanding I/O requests of any type to complete. This is intended 525 * to be called from inode eviction routines. This makes sure that any 526 * resources held by those requests are cleaned up before we let the inode get 527 * cleaned up. 528 */ 529static inline void netfs_wait_for_outstanding_io(struct inode *inode) 530{ 531 struct netfs_inode *ictx = netfs_inode(inode); 532 533 wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0); 534} 535 536#endif /* _LINUX_NETFS_H */