Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/* Network filesystem support services.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * See:
8 *
9 * Documentation/filesystems/netfs_library.rst
10 *
11 * for a description of the network filesystem interface declared here.
12 */
13
14#ifndef _LINUX_NETFS_H
15#define _LINUX_NETFS_H
16
17#include <linux/workqueue.h>
18#include <linux/fs.h>
19#include <linux/pagemap.h>
20#include <linux/uio.h>
21
22enum netfs_sreq_ref_trace;
23typedef struct mempool_s mempool_t;
24
25/**
26 * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
27 * @folio: The folio.
28 *
29 * Call this function before writing a folio to a local cache. Starting a
30 * second write before the first one finishes is not allowed.
31 *
32 * Note that this should no longer be used.
33 */
34static inline void folio_start_private_2(struct folio *folio)
35{
36 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
37 folio_get(folio);
38 folio_set_private_2(folio);
39}
40
41/* Marks used on xarray-based buffers */
42#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
43#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
44
45enum netfs_io_source {
46 NETFS_FILL_WITH_ZEROES,
47 NETFS_DOWNLOAD_FROM_SERVER,
48 NETFS_READ_FROM_CACHE,
49 NETFS_INVALID_READ,
50 NETFS_UPLOAD_TO_SERVER,
51 NETFS_WRITE_TO_CACHE,
52 NETFS_INVALID_WRITE,
53} __mode(byte);
54
55typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
56 bool was_async);
57
58/*
59 * Per-inode context. This wraps the VFS inode.
60 */
61struct netfs_inode {
62 struct inode inode; /* The VFS inode */
63 const struct netfs_request_ops *ops;
64#if IS_ENABLED(CONFIG_FSCACHE)
65 struct fscache_cookie *cache;
66#endif
67 struct mutex wb_lock; /* Writeback serialisation */
68 loff_t remote_i_size; /* Size of the remote file */
69 loff_t zero_point; /* Size after which we assume there's no data
70 * on the server */
71 atomic_t io_count; /* Number of outstanding reqs */
72 unsigned long flags;
73#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
74#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
75#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
76};
77
78/*
79 * A netfs group - for instance a ceph snap. This is marked on dirty pages and
80 * pages marked with a group must be flushed before they can be written under
81 * the domain of another group.
82 */
83struct netfs_group {
84 refcount_t ref;
85 void (*free)(struct netfs_group *netfs_group);
86};
87
88/*
89 * Information about a dirty page (attached only if necessary).
90 * folio->private
91 */
92struct netfs_folio {
93 struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */
94 unsigned int dirty_offset; /* Write-streaming dirty data offset */
95 unsigned int dirty_len; /* Write-streaming dirty data length */
96};
97#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
98#define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */
99
100static inline bool netfs_is_folio_info(const void *priv)
101{
102 return (unsigned long)priv & NETFS_FOLIO_INFO;
103}
104
105static inline struct netfs_folio *__netfs_folio_info(const void *priv)
106{
107 if (netfs_is_folio_info(priv))
108 return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
109 return NULL;
110}
111
112static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
113{
114 return __netfs_folio_info(folio_get_private(folio));
115}
116
117static inline struct netfs_group *netfs_folio_group(struct folio *folio)
118{
119 struct netfs_folio *finfo;
120 void *priv = folio_get_private(folio);
121
122 finfo = netfs_folio_info(folio);
123 if (finfo)
124 return finfo->netfs_group;
125 return priv;
126}
127
128/*
129 * Stream of I/O subrequests going to a particular destination, such as the
130 * server or the local cache. This is mainly intended for writing where we may
131 * have to write to multiple destinations concurrently.
132 */
133struct netfs_io_stream {
134 /* Submission tracking */
135 struct netfs_io_subrequest *construct; /* Op being constructed */
136 unsigned int submit_off; /* Folio offset we're submitting from */
137 unsigned int submit_len; /* Amount of data left to submit */
138 unsigned int submit_max_len; /* Amount I/O can be rounded up to */
139 void (*prepare_write)(struct netfs_io_subrequest *subreq);
140 void (*issue_write)(struct netfs_io_subrequest *subreq);
141 /* Collection tracking */
142 struct list_head subrequests; /* Contributory I/O operations */
143 struct netfs_io_subrequest *front; /* Op being collected */
144 unsigned long long collected_to; /* Position we've collected results to */
145 size_t transferred; /* The amount transferred from this stream */
146 enum netfs_io_source source; /* Where to read from/write to */
147 unsigned short error; /* Aggregate error for the stream */
148 unsigned char stream_nr; /* Index of stream in parent table */
149 bool avail; /* T if stream is available */
150 bool active; /* T if stream is active */
151 bool need_retry; /* T if this stream needs retrying */
152 bool failed; /* T if this stream failed */
153};
154
155/*
156 * Resources required to do operations on a cache.
157 */
158struct netfs_cache_resources {
159 const struct netfs_cache_ops *ops;
160 void *cache_priv;
161 void *cache_priv2;
162 unsigned int debug_id; /* Cookie debug ID */
163 unsigned int inval_counter; /* object->inval_counter at begin_op */
164};
165
166/*
167 * Descriptor for a single component subrequest. Each operation represents an
168 * individual read/write from/to a server, a cache, a journal, etc..
169 *
170 * The buffer iterator is persistent for the life of the subrequest struct and
171 * the pages it points to can be relied on to exist for the duration.
172 */
173struct netfs_io_subrequest {
174 struct netfs_io_request *rreq; /* Supervising I/O request */
175 struct work_struct work;
176 struct list_head rreq_link; /* Link in rreq->subrequests */
177 struct iov_iter io_iter; /* Iterator for this subrequest */
178 unsigned long long start; /* Where to start the I/O */
179 size_t max_len; /* Maximum size of the I/O */
180 size_t len; /* Size of the I/O */
181 size_t transferred; /* Amount of data transferred */
182 refcount_t ref;
183 short error; /* 0 or error that occurred */
184 unsigned short debug_index; /* Index in list (for debugging output) */
185 unsigned int nr_segs; /* Number of segs in io_iter */
186 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
187 enum netfs_io_source source; /* Where to read from/write to */
188 unsigned char stream_nr; /* I/O stream this belongs to */
189 unsigned long flags;
190#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
191#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
192#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
193#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
194#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
195#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
196#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
197#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
198#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
199#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
200#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
201#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */
202};
203
204enum netfs_io_origin {
205 NETFS_READAHEAD, /* This read was triggered by readahead */
206 NETFS_READPAGE, /* This read is a synchronous read */
207 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
208 NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */
209 NETFS_WRITEBACK, /* This write was triggered by writepages */
210 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
211 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
212 NETFS_DIO_READ, /* This is a direct I/O read */
213 NETFS_DIO_WRITE, /* This is a direct I/O write */
214 nr__netfs_io_origin
215} __mode(byte);
216
217/*
218 * Descriptor for an I/O helper request. This is used to make multiple I/O
219 * operations to a variety of data stores and then stitch the result together.
220 */
221struct netfs_io_request {
222 union {
223 struct work_struct work;
224 struct rcu_head rcu;
225 };
226 struct inode *inode; /* The file being accessed */
227 struct address_space *mapping; /* The mapping being accessed */
228 struct kiocb *iocb; /* AIO completion vector */
229 struct netfs_cache_resources cache_resources;
230 struct list_head proc_link; /* Link in netfs_iorequests */
231 struct list_head subrequests; /* Contributory I/O operations */
232 struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
233#define NR_IO_STREAMS 2 //wreq->nr_io_streams
234 struct netfs_group *group; /* Writeback group being written back */
235 struct iov_iter iter; /* Unencrypted-side iterator */
236 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
237 void *netfs_priv; /* Private data for the netfs */
238 void *netfs_priv2; /* Private data for the netfs */
239 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
240 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
241 unsigned int debug_id;
242 unsigned int rsize; /* Maximum read size (0 for none) */
243 unsigned int wsize; /* Maximum write size (0 for none) */
244 atomic_t subreq_counter; /* Next subreq->debug_index */
245 unsigned int nr_group_rel; /* Number of refs to release on ->group */
246 spinlock_t lock; /* Lock for queuing subreqs */
247 atomic_t nr_outstanding; /* Number of ops in progress */
248 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
249 size_t upper_len; /* Length can be extended to here */
250 unsigned long long submitted; /* Amount submitted for I/O so far */
251 unsigned long long len; /* Length of the request */
252 size_t transferred; /* Amount to be indicated as transferred */
253 short error; /* 0 or error that occurred */
254 enum netfs_io_origin origin; /* Origin of the request */
255 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
256 unsigned long long i_size; /* Size of the file */
257 unsigned long long start; /* Start position */
258 atomic64_t issued_to; /* Write issuer folio cursor */
259 unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */
260 unsigned long long collected_to; /* Point we've collected to */
261 unsigned long long cleaned_to; /* Position we've cleaned folios to */
262 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
263 refcount_t ref;
264 unsigned long flags;
265#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
266#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
267#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
268#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
269#define NETFS_RREQ_FAILED 4 /* The request failed */
270#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
271#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
272#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
273#define NETFS_RREQ_BLOCKED 10 /* We blocked */
274#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
275#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
276#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
277#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
278 * write to cache on read */
279 const struct netfs_request_ops *netfs_ops;
280 void (*cleanup)(struct netfs_io_request *req);
281};
282
283/*
284 * Operations the network filesystem can/must provide to the helpers.
285 */
286struct netfs_request_ops {
287 mempool_t *request_pool;
288 mempool_t *subrequest_pool;
289 int (*init_request)(struct netfs_io_request *rreq, struct file *file);
290 void (*free_request)(struct netfs_io_request *rreq);
291 void (*free_subrequest)(struct netfs_io_subrequest *rreq);
292
293 /* Read request handling */
294 void (*expand_readahead)(struct netfs_io_request *rreq);
295 bool (*clamp_length)(struct netfs_io_subrequest *subreq);
296 void (*issue_read)(struct netfs_io_subrequest *subreq);
297 bool (*is_still_valid)(struct netfs_io_request *rreq);
298 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
299 struct folio **foliop, void **_fsdata);
300 void (*done)(struct netfs_io_request *rreq);
301
302 /* Modification handling */
303 void (*update_i_size)(struct inode *inode, loff_t i_size);
304 void (*post_modify)(struct inode *inode);
305
306 /* Write request handling */
307 void (*begin_writeback)(struct netfs_io_request *wreq);
308 void (*prepare_write)(struct netfs_io_subrequest *subreq);
309 void (*issue_write)(struct netfs_io_subrequest *subreq);
310 void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
311 void (*invalidate_cache)(struct netfs_io_request *wreq);
312};
313
314/*
315 * How to handle reading from a hole.
316 */
317enum netfs_read_from_hole {
318 NETFS_READ_HOLE_IGNORE,
319 NETFS_READ_HOLE_CLEAR,
320 NETFS_READ_HOLE_FAIL,
321};
322
323/*
324 * Table of operations for access to a cache.
325 */
326struct netfs_cache_ops {
327 /* End an operation */
328 void (*end_operation)(struct netfs_cache_resources *cres);
329
330 /* Read data from the cache */
331 int (*read)(struct netfs_cache_resources *cres,
332 loff_t start_pos,
333 struct iov_iter *iter,
334 enum netfs_read_from_hole read_hole,
335 netfs_io_terminated_t term_func,
336 void *term_func_priv);
337
338 /* Write data to the cache */
339 int (*write)(struct netfs_cache_resources *cres,
340 loff_t start_pos,
341 struct iov_iter *iter,
342 netfs_io_terminated_t term_func,
343 void *term_func_priv);
344
345 /* Write data to the cache from a netfs subrequest. */
346 void (*issue_write)(struct netfs_io_subrequest *subreq);
347
348 /* Expand readahead request */
349 void (*expand_readahead)(struct netfs_cache_resources *cres,
350 unsigned long long *_start,
351 unsigned long long *_len,
352 unsigned long long i_size);
353
354 /* Prepare a read operation, shortening it to a cached/uncached
355 * boundary as appropriate.
356 */
357 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
358 unsigned long long i_size);
359
360 /* Prepare a write subrequest, working out if we're allowed to do it
361 * and finding out the maximum amount of data to gather before
362 * attempting to submit. If we're not permitted to do it, the
363 * subrequest should be marked failed.
364 */
365 void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq);
366
367 /* Prepare a write operation, working out what part of the write we can
368 * actually do.
369 */
370 int (*prepare_write)(struct netfs_cache_resources *cres,
371 loff_t *_start, size_t *_len, size_t upper_len,
372 loff_t i_size, bool no_space_allocated_yet);
373
374 /* Prepare an on-demand read operation, shortening it to a cached/uncached
375 * boundary as appropriate.
376 */
377 enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres,
378 loff_t start, size_t *_len,
379 loff_t i_size,
380 unsigned long *_flags, ino_t ino);
381
382 /* Query the occupancy of the cache in a region, returning where the
383 * next chunk of data starts and how long it is.
384 */
385 int (*query_occupancy)(struct netfs_cache_resources *cres,
386 loff_t start, size_t len, size_t granularity,
387 loff_t *_data_start, size_t *_data_len);
388};
389
390/* High-level read API. */
391ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter);
392ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
393ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
394ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
395
396/* High-level write API */
397ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
398 struct netfs_group *netfs_group);
399ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
400 struct netfs_group *netfs_group);
401ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
402ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
403 struct netfs_group *netfs_group);
404ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
405
406/* Address operations API */
407struct readahead_control;
408void netfs_readahead(struct readahead_control *);
409int netfs_read_folio(struct file *, struct folio *);
410int netfs_write_begin(struct netfs_inode *, struct file *,
411 struct address_space *, loff_t pos, unsigned int len,
412 struct folio **, void **fsdata);
413int netfs_writepages(struct address_space *mapping,
414 struct writeback_control *wbc);
415bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
416int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
417void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
418void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
419bool netfs_release_folio(struct folio *folio, gfp_t gfp);
420
421/* VMA operations API. */
422vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
423
424/* (Sub)request management API. */
425void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
426void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
427 enum netfs_sreq_ref_trace what);
428void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
429 bool was_async, enum netfs_sreq_ref_trace what);
430ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
431 struct iov_iter *new,
432 iov_iter_extraction_t extraction_flags);
433size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
434 size_t max_size, size_t max_segs);
435void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq);
436void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
437 bool was_async);
438void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
439
440int netfs_start_io_read(struct inode *inode);
441void netfs_end_io_read(struct inode *inode);
442int netfs_start_io_write(struct inode *inode);
443void netfs_end_io_write(struct inode *inode);
444int netfs_start_io_direct(struct inode *inode);
445void netfs_end_io_direct(struct inode *inode);
446
447/**
448 * netfs_inode - Get the netfs inode context from the inode
449 * @inode: The inode to query
450 *
451 * Get the netfs lib inode context from the network filesystem's inode. The
452 * context struct is expected to directly follow on from the VFS inode struct.
453 */
454static inline struct netfs_inode *netfs_inode(struct inode *inode)
455{
456 return container_of(inode, struct netfs_inode, inode);
457}
458
459/**
460 * netfs_inode_init - Initialise a netfslib inode context
461 * @ctx: The netfs inode to initialise
462 * @ops: The netfs's operations list
463 * @use_zero_point: True to use the zero_point read optimisation
464 *
465 * Initialise the netfs library context struct. This is expected to follow on
466 * directly from the VFS inode struct.
467 */
468static inline void netfs_inode_init(struct netfs_inode *ctx,
469 const struct netfs_request_ops *ops,
470 bool use_zero_point)
471{
472 ctx->ops = ops;
473 ctx->remote_i_size = i_size_read(&ctx->inode);
474 ctx->zero_point = LLONG_MAX;
475 ctx->flags = 0;
476 atomic_set(&ctx->io_count, 0);
477#if IS_ENABLED(CONFIG_FSCACHE)
478 ctx->cache = NULL;
479#endif
480 mutex_init(&ctx->wb_lock);
481 /* ->releasepage() drives zero_point */
482 if (use_zero_point) {
483 ctx->zero_point = ctx->remote_i_size;
484 mapping_set_release_always(ctx->inode.i_mapping);
485 }
486}
487
488/**
489 * netfs_resize_file - Note that a file got resized
490 * @ctx: The netfs inode being resized
491 * @new_i_size: The new file size
492 * @changed_on_server: The change was applied to the server
493 *
494 * Inform the netfs lib that a file got resized so that it can adjust its state.
495 */
496static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
497 bool changed_on_server)
498{
499 if (changed_on_server)
500 ctx->remote_i_size = new_i_size;
501 if (new_i_size < ctx->zero_point)
502 ctx->zero_point = new_i_size;
503}
504
505/**
506 * netfs_i_cookie - Get the cache cookie from the inode
507 * @ctx: The netfs inode to query
508 *
509 * Get the caching cookie (if enabled) from the network filesystem's inode.
510 */
511static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
512{
513#if IS_ENABLED(CONFIG_FSCACHE)
514 return ctx->cache;
515#else
516 return NULL;
517#endif
518}
519
520/**
521 * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
522 * @inode: The netfs inode to wait on
523 *
524 * Wait for outstanding I/O requests of any type to complete. This is intended
525 * to be called from inode eviction routines. This makes sure that any
526 * resources held by those requests are cleaned up before we let the inode get
527 * cleaned up.
528 */
529static inline void netfs_wait_for_outstanding_io(struct inode *inode)
530{
531 struct netfs_inode *ictx = netfs_inode(inode);
532
533 wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
534}
535
536#endif /* _LINUX_NETFS_H */