Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.10.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull netfs updates from Christian Brauner:
"This reworks the netfslib writeback implementation so that pages read
from the cache are written to the cache through ->writepages(),
thereby allowing the fscache page flag to be retired.

The reworking also:

- builds on top of the new writeback_iter() infrastructure

- makes it possible to use vectored write RPCs as discontiguous
streams of pages can be accommodated

- makes it easier to do simultaneous content crypto and stream
division

- provides support for retrying writes and re-dividing a stream

- replaces the ->launder_folio() op, so that ->writepages() is used
instead

- uses mempools to allocate the netfs_io_request and
netfs_io_subrequest structs to avoid allocation failure in the
writeback path

Some code that uses the fscache page flag is retained for
compatibility purposes with nfs and ceph. The code is switched to
using the synonymous private_2 label instead and marked with
deprecation comments.

The merge commit contains additional details on the new algorithm that
I've left out of here as it would probably be excessively detailed.

On top of the netfslib infrastructure this contains the work to
convert cifs over to netfslib"

* tag 'vfs-6.10.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (38 commits)
cifs: Enable large folio support
cifs: Remove some code that's no longer used, part 3
cifs: Remove some code that's no longer used, part 2
cifs: Remove some code that's no longer used, part 1
cifs: Cut over to using netfslib
cifs: Implement netfslib hooks
cifs: Make add_credits_and_wake_if() clear deducted credits
cifs: Add mempools for cifs_io_request and cifs_io_subrequest structs
cifs: Set zero_point in the copy_file_range() and remap_file_range()
cifs: Move cifs_loose_read_iter() and cifs_file_write_iter() to file.c
cifs: Replace the writedata replay bool with a netfs sreq flag
cifs: Make wait_mtu_credits take size_t args
cifs: Use more fields from netfs_io_subrequest
cifs: Replace cifs_writedata with a wrapper around netfs_io_subrequest
cifs: Replace cifs_readdata with a wrapper around netfs_io_subrequest
cifs: Use alternative invalidation to using launder_folio
netfs, afs: Use writeback retry to deal with alternate keys
netfs: Miscellaneous tidy ups
netfs: Remove the old writeback code
netfs: Cut over to using new writeback code
...

+3322 -4612
+38 -28
fs/9p/vfs_addr.c
··· 26 26 #include "cache.h" 27 27 #include "fid.h" 28 28 29 - static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq) 29 + /* 30 + * Writeback calls this when it finds a folio that needs uploading. This isn't 31 + * called if writeback only has copy-to-cache to deal with. 32 + */ 33 + static void v9fs_begin_writeback(struct netfs_io_request *wreq) 34 + { 35 + struct p9_fid *fid; 36 + 37 + fid = v9fs_fid_find_inode(wreq->inode, true, INVALID_UID, true); 38 + if (!fid) { 39 + WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", 40 + wreq->inode->i_ino); 41 + return; 42 + } 43 + 44 + wreq->wsize = fid->clnt->msize - P9_IOHDRSZ; 45 + if (fid->iounit) 46 + wreq->wsize = min(wreq->wsize, fid->iounit); 47 + wreq->netfs_priv = fid; 48 + wreq->io_streams[0].avail = true; 49 + } 50 + 51 + /* 52 + * Issue a subrequest to write to the server. 53 + */ 54 + static void v9fs_issue_write(struct netfs_io_subrequest *subreq) 30 55 { 31 56 struct p9_fid *fid = subreq->rreq->netfs_priv; 32 57 int err, len; 33 58 34 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 35 59 len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); 36 60 netfs_write_subrequest_terminated(subreq, len ?: err, false); 37 - } 38 - 39 - static void v9fs_upload_to_server_worker(struct work_struct *work) 40 - { 41 - struct netfs_io_subrequest *subreq = 42 - container_of(work, struct netfs_io_subrequest, work); 43 - 44 - v9fs_upload_to_server(subreq); 45 - } 46 - 47 - /* 48 - * Set up write requests for a writeback slice. We need to add a write request 49 - * for each write we want to make. 50 - */ 51 - static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 52 - { 53 - struct netfs_io_subrequest *subreq; 54 - 55 - subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 56 - start, len, v9fs_upload_to_server_worker); 57 - if (subreq) 58 - netfs_queue_write_request(subreq); 59 61 } 60 62 61 63 /** ··· 89 87 { 90 88 struct p9_fid *fid; 91 89 bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || 92 - rreq->origin == NETFS_WRITEBACK || 93 90 rreq->origin == NETFS_WRITETHROUGH || 94 - rreq->origin == NETFS_LAUNDER_WRITE || 95 91 rreq->origin == NETFS_UNBUFFERED_WRITE || 96 92 rreq->origin == NETFS_DIO_WRITE); 93 + 94 + if (rreq->origin == NETFS_WRITEBACK) 95 + return 0; /* We don't get the write handle until we find we 96 + * have actually dirty data and not just 97 + * copy-to-cache data. 98 + */ 97 99 98 100 if (file) { 99 101 fid = file->private_data; ··· 109 103 if (!fid) 110 104 goto no_fid; 111 105 } 106 + 107 + rreq->wsize = fid->clnt->msize - P9_IOHDRSZ; 108 + if (fid->iounit) 109 + rreq->wsize = min(rreq->wsize, fid->iounit); 112 110 113 111 /* we might need to read from a fid that was opened write-only 114 112 * for read-modify-write of page cache, use the writeback fid ··· 142 132 .init_request = v9fs_init_request, 143 133 .free_request = v9fs_free_request, 144 134 .issue_read = v9fs_issue_read, 145 - .create_write_requests = v9fs_create_write_requests, 135 + .begin_writeback = v9fs_begin_writeback, 136 + .issue_write = v9fs_issue_write, 146 137 }; 147 138 148 139 const struct address_space_operations v9fs_addr_operations = { ··· 152 141 .dirty_folio = netfs_dirty_folio, 153 142 .release_folio = netfs_release_folio, 154 143 .invalidate_folio = netfs_invalidate_folio, 155 - .launder_folio = netfs_launder_folio, 156 144 .direct_IO = noop_direct_IO, 157 145 .writepages = netfs_writepages, 158 146 };
+5 -3
fs/afs/file.c
··· 54 54 .read_folio = netfs_read_folio, 55 55 .readahead = netfs_readahead, 56 56 .dirty_folio = netfs_dirty_folio, 57 - .launder_folio = netfs_launder_folio, 58 57 .release_folio = netfs_release_folio, 59 58 .invalidate_folio = netfs_invalidate_folio, 60 59 .migrate_folio = filemap_migrate_folio, ··· 353 354 if (file) 354 355 rreq->netfs_priv = key_get(afs_file_key(file)); 355 356 rreq->rsize = 256 * 1024; 356 - rreq->wsize = 256 * 1024; 357 + rreq->wsize = 256 * 1024 * 1024; 357 358 return 0; 358 359 } 359 360 ··· 368 369 static void afs_free_request(struct netfs_io_request *rreq) 369 370 { 370 371 key_put(rreq->netfs_priv); 372 + afs_put_wb_key(rreq->netfs_priv2); 371 373 } 372 374 373 375 static void afs_update_i_size(struct inode *inode, loff_t new_i_size) ··· 400 400 .issue_read = afs_issue_read, 401 401 .update_i_size = afs_update_i_size, 402 402 .invalidate_cache = afs_netfs_invalidate_cache, 403 - .create_write_requests = afs_create_write_requests, 403 + .begin_writeback = afs_begin_writeback, 404 + .prepare_write = afs_prepare_write, 405 + .issue_write = afs_issue_write, 404 406 }; 405 407 406 408 static void afs_add_open_mmap(struct afs_vnode *vnode)
+4 -2
fs/afs/internal.h
··· 916 916 loff_t pos; 917 917 loff_t size; 918 918 loff_t i_size; 919 - bool laundering; /* Laundering page, PG_writeback not set */ 920 919 } store; 921 920 struct { 922 921 struct iattr *attr; ··· 1598 1599 /* 1599 1600 * write.c 1600 1601 */ 1602 + void afs_prepare_write(struct netfs_io_subrequest *subreq); 1603 + void afs_issue_write(struct netfs_io_subrequest *subreq); 1604 + void afs_begin_writeback(struct netfs_io_request *wreq); 1605 + void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream); 1601 1606 extern int afs_writepages(struct address_space *, struct writeback_control *); 1602 1607 extern int afs_fsync(struct file *, loff_t, loff_t, int); 1603 1608 extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); 1604 1609 extern void afs_prune_wb_keys(struct afs_vnode *); 1605 - void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len); 1606 1610 1607 1611 /* 1608 1612 * xattr.c
+2 -2
fs/afs/validation.c
··· 365 365 * written back in a regular file and completely discard the pages in a 366 366 * directory or symlink */ 367 367 if (S_ISREG(vnode->netfs.inode.i_mode)) 368 - invalidate_remote_inode(&vnode->netfs.inode); 368 + filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX); 369 369 else 370 - invalidate_inode_pages2(vnode->netfs.inode.i_mapping); 370 + filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX); 371 371 } 372 372 373 373 /*
+100 -93
fs/afs/write.c
··· 29 29 30 30 /* 31 31 * Find a key to use for the writeback. We cached the keys used to author the 32 - * writes on the vnode. *_wbk will contain the last writeback key used or NULL 33 - * and we need to start from there if it's set. 32 + * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key 33 + * record used or NULL and we need to start from there if it's set. 34 + * wreq->netfs_priv will be set to the key itself or NULL. 34 35 */ 35 - static int afs_get_writeback_key(struct afs_vnode *vnode, 36 - struct afs_wb_key **_wbk) 36 + static void afs_get_writeback_key(struct netfs_io_request *wreq) 37 37 { 38 - struct afs_wb_key *wbk = NULL; 39 - struct list_head *p; 40 - int ret = -ENOKEY, ret2; 38 + struct afs_wb_key *wbk, *old = wreq->netfs_priv2; 39 + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); 40 + 41 + key_put(wreq->netfs_priv); 42 + wreq->netfs_priv = NULL; 43 + wreq->netfs_priv2 = NULL; 41 44 42 45 spin_lock(&vnode->wb_lock); 43 - if (*_wbk) 44 - p = (*_wbk)->vnode_link.next; 46 + if (old) 47 + wbk = list_next_entry(old, vnode_link); 45 48 else 46 - p = vnode->wb_keys.next; 49 + wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link); 47 50 48 - while (p != &vnode->wb_keys) { 49 - wbk = list_entry(p, struct afs_wb_key, vnode_link); 51 + list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) { 50 52 _debug("wbk %u", key_serial(wbk->key)); 51 - ret2 = key_validate(wbk->key); 52 - if (ret2 == 0) { 53 + if (key_validate(wbk->key) == 0) { 53 54 refcount_inc(&wbk->usage); 55 + wreq->netfs_priv = key_get(wbk->key); 56 + wreq->netfs_priv2 = wbk; 54 57 _debug("USE WB KEY %u", key_serial(wbk->key)); 55 58 break; 56 59 } 57 - 58 - wbk = NULL; 59 - if (ret == -ENOKEY) 60 - ret = ret2; 61 - p = p->next; 62 60 } 63 61 64 62 spin_unlock(&vnode->wb_lock); 65 - if (*_wbk) 66 - afs_put_wb_key(*_wbk); 67 - *_wbk = wbk; 68 - return 0; 63 + 64 + afs_put_wb_key(old); 69 65 } 70 66 71 67 static void afs_store_data_success(struct afs_operation *op) ··· 71 75 op->ctime = op->file[0].scb.status.mtime_client; 72 76 afs_vnode_commit_status(op, &op->file[0]); 73 77 if (!afs_op_error(op)) { 74 - if (!op->store.laundering) 75 - afs_pages_written_back(vnode, op->store.pos, op->store.size); 78 + afs_pages_written_back(vnode, op->store.pos, op->store.size); 76 79 afs_stat_v(vnode, n_stores); 77 80 atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes); 78 81 } ··· 84 89 }; 85 90 86 91 /* 87 - * write to a file 92 + * Prepare a subrequest to write to the server. This sets the max_len 93 + * parameter. 88 94 */ 89 - static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos, 90 - bool laundering) 95 + void afs_prepare_write(struct netfs_io_subrequest *subreq) 91 96 { 97 + //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) 98 + // subreq->max_len = 512 * 1024; 99 + //else 100 + subreq->max_len = 256 * 1024 * 1024; 101 + } 102 + 103 + /* 104 + * Issue a subrequest to write to the server. 105 + */ 106 + static void afs_issue_write_worker(struct work_struct *work) 107 + { 108 + struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work); 109 + struct netfs_io_request *wreq = subreq->rreq; 92 110 struct afs_operation *op; 93 - struct afs_wb_key *wbk = NULL; 94 - loff_t size = iov_iter_count(iter); 111 + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); 112 + unsigned long long pos = subreq->start + subreq->transferred; 113 + size_t len = subreq->len - subreq->transferred; 95 114 int ret = -ENOKEY; 96 115 97 - _enter("%s{%llx:%llu.%u},%llx,%llx", 116 + _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx", 117 + wreq->debug_id, subreq->debug_index, 98 118 vnode->volume->name, 99 119 vnode->fid.vid, 100 120 vnode->fid.vnode, 101 121 vnode->fid.unique, 102 - size, pos); 122 + pos, len); 103 123 104 - ret = afs_get_writeback_key(vnode, &wbk); 105 - if (ret) { 106 - _leave(" = %d [no keys]", ret); 107 - return ret; 108 - } 124 + #if 0 // Error injection 125 + if (subreq->debug_index == 3) 126 + return netfs_write_subrequest_terminated(subreq, -ENOANO, false); 109 127 110 - op = afs_alloc_operation(wbk->key, vnode->volume); 111 - if (IS_ERR(op)) { 112 - afs_put_wb_key(wbk); 113 - return -ENOMEM; 128 + if (!test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { 129 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 130 + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); 114 131 } 132 + #endif 133 + 134 + op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); 135 + if (IS_ERR(op)) 136 + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); 115 137 116 138 afs_op_set_vnode(op, 0, vnode); 117 - op->file[0].dv_delta = 1; 139 + op->file[0].dv_delta = 1; 118 140 op->file[0].modification = true; 119 - op->store.pos = pos; 120 - op->store.size = size; 121 - op->store.laundering = laundering; 122 - op->flags |= AFS_OPERATION_UNINTR; 123 - op->ops = &afs_store_data_operation; 141 + op->store.pos = pos; 142 + op->store.size = len; 143 + op->flags |= AFS_OPERATION_UNINTR; 144 + op->ops = &afs_store_data_operation; 124 145 125 - try_next_key: 126 146 afs_begin_vnode_operation(op); 127 147 128 - op->store.write_iter = iter; 129 - op->store.i_size = max(pos + size, vnode->netfs.remote_i_size); 130 - op->mtime = inode_get_mtime(&vnode->netfs.inode); 148 + op->store.write_iter = &subreq->io_iter; 149 + op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size); 150 + op->mtime = inode_get_mtime(&vnode->netfs.inode); 131 151 132 152 afs_wait_for_operation(op); 133 - 134 - switch (afs_op_error(op)) { 153 + ret = afs_put_operation(op); 154 + switch (ret) { 135 155 case -EACCES: 136 156 case -EPERM: 137 157 case -ENOKEY: 138 158 case -EKEYEXPIRED: 139 159 case -EKEYREJECTED: 140 160 case -EKEYREVOKED: 141 - _debug("next"); 142 - 143 - ret = afs_get_writeback_key(vnode, &wbk); 144 - if (ret == 0) { 145 - key_put(op->key); 146 - op->key = key_get(wbk->key); 147 - goto try_next_key; 148 - } 161 + /* If there are more keys we can try, use the retry algorithm 162 + * to rotate the keys. 163 + */ 164 + if (wreq->netfs_priv2) 165 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 149 166 break; 150 167 } 151 168 152 - afs_put_wb_key(wbk); 153 - _leave(" = %d", afs_op_error(op)); 154 - return afs_put_operation(op); 169 + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); 155 170 } 156 171 157 - static void afs_upload_to_server(struct netfs_io_subrequest *subreq) 172 + void afs_issue_write(struct netfs_io_subrequest *subreq) 158 173 { 159 - struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); 160 - ssize_t ret; 161 - 162 - _enter("%x[%x],%zx", 163 - subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count); 164 - 165 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 166 - ret = afs_store_data(vnode, &subreq->io_iter, subreq->start, 167 - subreq->rreq->origin == NETFS_LAUNDER_WRITE); 168 - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, 169 - false); 170 - } 171 - 172 - static void afs_upload_to_server_worker(struct work_struct *work) 173 - { 174 - struct netfs_io_subrequest *subreq = 175 - container_of(work, struct netfs_io_subrequest, work); 176 - 177 - afs_upload_to_server(subreq); 174 + subreq->work.func = afs_issue_write_worker; 175 + if (!queue_work(system_unbound_wq, &subreq->work)) 176 + WARN_ON_ONCE(1); 178 177 } 179 178 180 179 /* 181 - * Set up write requests for a writeback slice. We need to add a write request 182 - * for each write we want to make. 180 + * Writeback calls this when it finds a folio that needs uploading. This isn't 181 + * called if writeback only has copy-to-cache to deal with. 183 182 */ 184 - void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 183 + void afs_begin_writeback(struct netfs_io_request *wreq) 185 184 { 186 - struct netfs_io_subrequest *subreq; 185 + afs_get_writeback_key(wreq); 186 + wreq->io_streams[0].avail = true; 187 + } 187 188 188 - _enter("%x,%llx-%llx", wreq->debug_id, start, start + len); 189 + /* 190 + * Prepare to retry the writes in request. Use this to try rotating the 191 + * available writeback keys. 192 + */ 193 + void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream) 194 + { 195 + struct netfs_io_subrequest *subreq = 196 + list_first_entry(&stream->subrequests, 197 + struct netfs_io_subrequest, rreq_link); 189 198 190 - subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 191 - start, len, afs_upload_to_server_worker); 192 - if (subreq) 193 - netfs_queue_write_request(subreq); 199 + switch (subreq->error) { 200 + case -EACCES: 201 + case -EPERM: 202 + case -ENOKEY: 203 + case -EKEYEXPIRED: 204 + case -EKEYREJECTED: 205 + case -EKEYREVOKED: 206 + afs_get_writeback_key(wreq); 207 + if (!wreq->netfs_priv) 208 + stream->failed = true; 209 + break; 210 + } 194 211 } 195 212 196 213 /*
+75 -1
fs/cachefiles/io.c
··· 9 9 #include <linux/slab.h> 10 10 #include <linux/file.h> 11 11 #include <linux/uio.h> 12 + #include <linux/bio.h> 12 13 #include <linux/falloc.h> 13 14 #include <linux/sched/mm.h> 14 15 #include <trace/events/fscache.h> ··· 494 493 * boundary as appropriate. 495 494 */ 496 495 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, 497 - loff_t i_size) 496 + unsigned long long i_size) 498 497 { 499 498 return cachefiles_do_prepare_read(&subreq->rreq->cache_resources, 500 499 subreq->start, &subreq->len, i_size, ··· 623 622 return ret; 624 623 } 625 624 625 + static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq) 626 + { 627 + struct netfs_io_request *wreq = subreq->rreq; 628 + struct netfs_cache_resources *cres = &wreq->cache_resources; 629 + 630 + _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start); 631 + 632 + subreq->max_len = ULONG_MAX; 633 + subreq->max_nr_segs = BIO_MAX_VECS; 634 + 635 + if (!cachefiles_cres_file(cres)) { 636 + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) 637 + return netfs_prepare_write_failed(subreq); 638 + if (!cachefiles_cres_file(cres)) 639 + return netfs_prepare_write_failed(subreq); 640 + } 641 + } 642 + 643 + static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) 644 + { 645 + struct netfs_io_request *wreq = subreq->rreq; 646 + struct netfs_cache_resources *cres = &wreq->cache_resources; 647 + struct cachefiles_object *object = cachefiles_cres_object(cres); 648 + struct cachefiles_cache *cache = object->volume->cache; 649 + const struct cred *saved_cred; 650 + size_t off, pre, post, len = subreq->len; 651 + loff_t start = subreq->start; 652 + int ret; 653 + 654 + _enter("W=%x[%x] %llx-%llx", 655 + wreq->debug_id, subreq->debug_index, start, start + len - 1); 656 + 657 + /* We need to start on the cache granularity boundary */ 658 + off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1); 659 + if (off) { 660 + pre = CACHEFILES_DIO_BLOCK_SIZE - off; 661 + if (pre >= len) { 662 + netfs_write_subrequest_terminated(subreq, len, false); 663 + return; 664 + } 665 + subreq->transferred += pre; 666 + start += pre; 667 + len -= pre; 668 + iov_iter_advance(&subreq->io_iter, pre); 669 + } 670 + 671 + /* We also need to end on the cache granularity boundary */ 672 + post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1); 673 + if (post) { 674 + len -= post; 675 + if (len == 0) { 676 + netfs_write_subrequest_terminated(subreq, post, false); 677 + return; 678 + } 679 + iov_iter_truncate(&subreq->io_iter, len); 680 + } 681 + 682 + cachefiles_begin_secure(cache, &saved_cred); 683 + ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), 684 + &start, &len, len, true); 685 + cachefiles_end_secure(cache, saved_cred); 686 + if (ret < 0) { 687 + netfs_write_subrequest_terminated(subreq, ret, false); 688 + return; 689 + } 690 + 691 + cachefiles_write(&subreq->rreq->cache_resources, 692 + subreq->start, &subreq->io_iter, 693 + netfs_write_subrequest_terminated, subreq); 694 + } 695 + 626 696 /* 627 697 * Clean up an operation. 628 698 */ ··· 710 638 .end_operation = cachefiles_end_operation, 711 639 .read = cachefiles_read, 712 640 .write = cachefiles_write, 641 + .issue_write = cachefiles_issue_write, 713 642 .prepare_read = cachefiles_prepare_read, 714 643 .prepare_write = cachefiles_prepare_write, 644 + .prepare_write_subreq = cachefiles_prepare_write_subreq, 715 645 .prepare_ondemand_read = cachefiles_prepare_ondemand_read, 716 646 .query_occupancy = cachefiles_query_occupancy, 717 647 };
+4 -20
fs/ceph/addr.c
··· 193 193 * block, but do not exceed the file size, unless the original 194 194 * request already exceeds it. 195 195 */ 196 - new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); 196 + new_end = umin(round_up(end, lo->stripe_unit), rreq->i_size); 197 197 if (new_end > end && new_end <= rreq->start + max_len) 198 198 rreq->len = new_end - rreq->start; 199 199 ··· 498 498 }; 499 499 500 500 #ifdef CONFIG_CEPH_FSCACHE 501 - static void ceph_set_page_fscache(struct page *page) 502 - { 503 - set_page_fscache(page); 504 - } 505 - 506 501 static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) 507 502 { 508 503 struct inode *inode = priv; ··· 512 517 struct fscache_cookie *cookie = ceph_fscache_cookie(ci); 513 518 514 519 fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), 515 - ceph_fscache_write_terminated, inode, caching); 520 + ceph_fscache_write_terminated, inode, true, caching); 516 521 } 517 522 #else 518 - static inline void ceph_set_page_fscache(struct page *page) 519 - { 520 - } 521 - 522 523 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 523 524 { 524 525 } ··· 706 715 len = wlen; 707 716 708 717 set_page_writeback(page); 709 - if (caching) 710 - ceph_set_page_fscache(page); 711 718 ceph_fscache_write_to_cache(inode, page_off, len, caching); 712 719 713 720 if (IS_ENCRYPTED(inode)) { ··· 788 799 redirty_page_for_writepage(wbc, page); 789 800 return AOP_WRITEPAGE_ACTIVATE; 790 801 } 791 - 792 - wait_on_page_fscache(page); 793 802 794 803 err = writepage_nounlock(page, wbc); 795 804 if (err == -ERESTARTSYS) { ··· 1062 1075 unlock_page(page); 1063 1076 break; 1064 1077 } 1065 - if (PageWriteback(page) || PageFsCache(page)) { 1078 + if (PageWriteback(page)) { 1066 1079 if (wbc->sync_mode == WB_SYNC_NONE) { 1067 1080 doutc(cl, "%p under writeback\n", page); 1068 1081 unlock_page(page); ··· 1070 1083 } 1071 1084 doutc(cl, "waiting on writeback %p\n", page); 1072 1085 wait_on_page_writeback(page); 1073 - wait_on_page_fscache(page); 1074 1086 } 1075 1087 1076 1088 if (!clear_page_dirty_for_io(page)) { ··· 1254 1268 } 1255 1269 1256 1270 set_page_writeback(page); 1257 - if (caching) 1258 - ceph_set_page_fscache(page); 1259 1271 len += thp_size(page); 1260 1272 } 1261 1273 ceph_fscache_write_to_cache(inode, offset, len, caching); ··· 1497 1513 if (r < 0) 1498 1514 return r; 1499 1515 1500 - folio_wait_fscache(folio); 1516 + folio_wait_private_2(folio); /* [DEPRECATED] */ 1501 1517 WARN_ON_ONCE(!folio_test_locked(folio)); 1502 1518 *pagep = &folio->page; 1503 1519 return 0;
+2
fs/ceph/inode.c
··· 577 577 578 578 /* Set parameters for the netfs library */ 579 579 netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); 580 + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ 581 + __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); 580 582 581 583 spin_lock_init(&ci->i_ceph_lock); 582 584
+2 -1
fs/netfs/Makefile
··· 11 11 main.o \ 12 12 misc.o \ 13 13 objects.o \ 14 - output.o 14 + write_collect.o \ 15 + write_issue.o 15 16 16 17 netfs-$(CONFIG_NETFS_STATS) += stats.o 17 18
+26 -14
fs/netfs/buffered_read.c
··· 10 10 #include "internal.h" 11 11 12 12 /* 13 - * Unlock the folios in a read operation. We need to set PG_fscache on any 13 + * Unlock the folios in a read operation. We need to set PG_writeback on any 14 14 * folios we're going to write back before we unlock them. 15 + * 16 + * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use 17 + * PG_private_2 and do a direct write to the cache from here instead. 15 18 */ 16 19 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17 20 { ··· 51 48 xas_for_each(&xas, folio, last_page) { 52 49 loff_t pg_end; 53 50 bool pg_failed = false; 54 - bool folio_started; 51 + bool wback_to_cache = false; 52 + bool folio_started = false; 55 53 56 54 if (xas_retry(&xas, folio)) 57 55 continue; 58 56 59 57 pg_end = folio_pos(folio) + folio_size(folio) - 1; 60 58 61 - folio_started = false; 62 59 for (;;) { 63 60 loff_t sreq_end; 64 61 ··· 66 63 pg_failed = true; 67 64 break; 68 65 } 69 - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 70 - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 - folio_start_fscache(folio); 72 - folio_started = true; 66 + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 67 + if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, 68 + &subreq->flags)) { 69 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 70 + folio_start_private_2(folio); 71 + folio_started = true; 72 + } 73 + } else { 74 + wback_to_cache |= 75 + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 73 76 } 74 77 pg_failed |= subreq_failed; 75 78 sreq_end = subreq->start + subreq->len - 1; ··· 107 98 kfree(finfo); 108 99 } 109 100 folio_mark_uptodate(folio); 101 + if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 102 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 103 + folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 104 + filemap_dirty_folio(folio->mapping, folio); 105 + } 110 106 } 111 107 112 108 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { ··· 130 116 } 131 117 132 118 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 133 - loff_t *_start, size_t *_len, loff_t i_size) 119 + unsigned long long *_start, 120 + unsigned long long *_len, 121 + unsigned long long i_size) 134 122 { 135 123 struct netfs_cache_resources *cres = &rreq->cache_resources; 136 124 ··· 282 266 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 283 267 goto discard; 284 268 285 - netfs_stat(&netfs_n_rh_readpage); 269 + netfs_stat(&netfs_n_rh_read_folio); 286 270 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 287 271 288 272 /* Set up the output buffer */ ··· 466 450 if (!netfs_is_cache_enabled(ctx) && 467 451 netfs_skip_folio_read(folio, pos, len, false)) { 468 452 netfs_stat(&netfs_n_rh_write_zskip); 469 - goto have_folio_no_wait; 453 + goto have_folio; 470 454 } 471 455 472 456 rreq = netfs_alloc_request(mapping, file, ··· 507 491 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 508 492 509 493 have_folio: 510 - ret = folio_wait_fscache_killable(folio); 511 - if (ret < 0) 512 - goto error; 513 - have_folio_no_wait: 514 494 *_folio = folio; 515 495 _leave(" = 0"); 516 496 return 0;
+77 -754
fs/netfs/buffered_write.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 - /* Network filesystem high-level write support. 2 + /* Network filesystem high-level buffered write support. 3 3 * 4 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 5 * Written by David Howells (dhowells@redhat.com) ··· 26 26 NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ 27 27 }; 28 28 29 - static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); 30 - 31 29 static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) 32 30 { 33 - if (netfs_group && !folio_get_private(folio)) 34 - folio_attach_private(folio, netfs_get_group(netfs_group)); 35 - } 31 + void *priv = folio_get_private(folio); 36 32 37 - #if IS_ENABLED(CONFIG_FSCACHE) 38 - static void netfs_folio_start_fscache(bool caching, struct folio *folio) 39 - { 40 - if (caching) 41 - folio_start_fscache(folio); 33 + if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) 34 + folio_attach_private(folio, netfs_get_group(netfs_group)); 35 + else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) 36 + folio_detach_private(folio); 42 37 } 43 - #else 44 - static void netfs_folio_start_fscache(bool caching, struct folio *folio) 45 - { 46 - } 47 - #endif 48 38 49 39 /* 50 40 * Decide how we should modify a folio. We might be attempting to do ··· 53 63 bool maybe_trouble) 54 64 { 55 65 struct netfs_folio *finfo = netfs_folio_info(folio); 66 + struct netfs_group *group = netfs_folio_group(folio); 56 67 loff_t pos = folio_file_pos(folio); 57 68 58 69 _enter(""); 59 70 60 - if (netfs_folio_group(folio) != netfs_group) 71 + if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) 61 72 return NETFS_FLUSH_CONTENT; 62 73 63 74 if (folio_test_uptodate(folio)) ··· 72 81 73 82 if (file->f_mode & FMODE_READ) 74 83 goto no_write_streaming; 75 - if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 76 - goto no_write_streaming; 77 84 78 85 if (netfs_is_cache_enabled(ctx)) { 79 86 /* We don't want to get a streaming write on a file that loses 80 87 * caching service temporarily because the backing store got 81 88 * culled. 82 89 */ 83 - if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 84 - set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); 85 90 goto no_write_streaming; 86 91 } 87 92 ··· 117 130 mapping_gfp_mask(mapping)); 118 131 } 119 132 133 + /* 134 + * Update i_size and estimate the update to i_blocks to reflect the additional 135 + * data written into the pagecache until we can find out from the server what 136 + * the values actually are. 137 + */ 138 + static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, 139 + loff_t i_size, loff_t pos, size_t copied) 140 + { 141 + blkcnt_t add; 142 + size_t gap; 143 + 144 + if (ctx->ops->update_i_size) { 145 + ctx->ops->update_i_size(inode, pos); 146 + return; 147 + } 148 + 149 + i_size_write(inode, pos); 150 + #if IS_ENABLED(CONFIG_FSCACHE) 151 + fscache_update_cookie(ctx->cache, NULL, &pos); 152 + #endif 153 + 154 + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); 155 + if (copied > gap) { 156 + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); 157 + 158 + inode->i_blocks = min_t(blkcnt_t, 159 + DIV_ROUND_UP(pos, SECTOR_SIZE), 160 + inode->i_blocks + add); 161 + } 162 + } 163 + 120 164 /** 121 165 * netfs_perform_write - Copy data into the pagecache. 122 166 * @iocb: The operation parameters ··· 178 160 }; 179 161 struct netfs_io_request *wreq = NULL; 180 162 struct netfs_folio *finfo; 181 - struct folio *folio; 163 + struct folio *folio, *writethrough = NULL; 182 164 enum netfs_how_to_modify howto; 183 165 enum netfs_folio_trace trace; 184 166 unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; ··· 207 189 } 208 190 if (!is_sync_kiocb(iocb)) 209 191 wreq->iocb = iocb; 210 - wreq->cleanup = netfs_cleanup_buffered_write; 192 + netfs_stat(&netfs_n_wh_writethrough); 193 + } else { 194 + netfs_stat(&netfs_n_wh_buffered_write); 211 195 } 212 196 213 197 do { ··· 249 229 flen = folio_size(folio); 250 230 offset = pos & (flen - 1); 251 231 part = min_t(size_t, flen - offset, part); 232 + 233 + /* Wait for writeback to complete. The writeback engine owns 234 + * the info in folio->private and may change it until it 235 + * removes the WB mark. 236 + */ 237 + if (folio_get_private(folio) && 238 + folio_wait_writeback_killable(folio)) { 239 + ret = written ? -EINTR : -ERESTARTSYS; 240 + goto error_folio_unlock; 241 + } 252 242 253 243 if (signal_pending(current)) { 254 244 ret = written ? -EINTR : -ERESTARTSYS; ··· 334 304 maybe_trouble = true; 335 305 iov_iter_revert(iter, copied); 336 306 copied = 0; 307 + folio_unlock(folio); 337 308 goto retry; 338 309 } 339 310 netfs_set_group(folio, netfs_group); ··· 382 351 trace_netfs_folio(folio, trace); 383 352 384 353 /* Update the inode size if we moved the EOF marker */ 385 - i_size = i_size_read(inode); 386 354 pos += copied; 387 - if (pos > i_size) { 388 - if (ctx->ops->update_i_size) { 389 - ctx->ops->update_i_size(inode, pos); 390 - } else { 391 - i_size_write(inode, pos); 392 - #if IS_ENABLED(CONFIG_FSCACHE) 393 - fscache_update_cookie(ctx->cache, NULL, &pos); 394 - #endif 395 - } 396 - } 355 + i_size = i_size_read(inode); 356 + if (pos > i_size) 357 + netfs_update_i_size(ctx, inode, i_size, pos, copied); 397 358 written += copied; 398 359 399 360 if (likely(!wreq)) { 400 361 folio_mark_dirty(folio); 362 + folio_unlock(folio); 401 363 } else { 402 - if (folio_test_dirty(folio)) 403 - /* Sigh. mmap. */ 404 - folio_clear_dirty_for_io(folio); 405 - /* We make multiple writes to the folio... */ 406 - if (!folio_test_writeback(folio)) { 407 - folio_wait_fscache(folio); 408 - folio_start_writeback(folio); 409 - folio_start_fscache(folio); 410 - if (wreq->iter.count == 0) 411 - trace_netfs_folio(folio, netfs_folio_trace_wthru); 412 - else 413 - trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 414 - } 415 - netfs_advance_writethrough(wreq, copied, 416 - offset + copied == flen); 364 + netfs_advance_writethrough(wreq, &wbc, folio, copied, 365 + offset + copied == flen, 366 + &writethrough); 367 + /* Folio unlocked */ 417 368 } 418 369 retry: 419 - folio_unlock(folio); 420 370 folio_put(folio); 421 371 folio = NULL; 422 372 ··· 405 393 } while (iov_iter_count(iter)); 406 394 407 395 out: 396 + if (likely(written) && ctx->ops->post_modify) 397 + ctx->ops->post_modify(inode); 398 + 408 399 if (unlikely(wreq)) { 409 - ret2 = netfs_end_writethrough(wreq, iocb); 400 + ret2 = netfs_end_writethrough(wreq, &wbc, writethrough); 410 401 wbc_detach_inode(&wbc); 411 402 if (ret2 == -EIOCBQUEUED) 412 403 return ret2; ··· 520 505 */ 521 506 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) 522 507 { 508 + struct netfs_group *group; 523 509 struct folio *folio = page_folio(vmf->page); 524 510 struct file *file = vmf->vma->vm_file; 525 511 struct inode *inode = file_inode(file); 512 + struct netfs_inode *ictx = netfs_inode(inode); 526 513 vm_fault_t ret = VM_FAULT_RETRY; 527 514 int err; 528 515 ··· 532 515 533 516 sb_start_pagefault(inode->i_sb); 534 517 535 - if (folio_wait_writeback_killable(folio)) 536 - goto out; 537 - 538 518 if (folio_lock_killable(folio) < 0) 539 519 goto out; 520 + 521 + if (folio_wait_writeback_killable(folio)) { 522 + ret = VM_FAULT_LOCKED; 523 + goto out; 524 + } 540 525 541 526 /* Can we see a streaming write here? */ 542 527 if (WARN_ON(!folio_test_uptodate(folio))) { ··· 546 527 goto out; 547 528 } 548 529 549 - if (netfs_folio_group(folio) != netfs_group) { 530 + group = netfs_folio_group(folio); 531 + if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { 550 532 folio_unlock(folio); 551 533 err = filemap_fdatawait_range(inode->i_mapping, 552 534 folio_pos(folio), ··· 571 551 trace_netfs_folio(folio, netfs_folio_trace_mkwrite); 572 552 netfs_set_group(folio, netfs_group); 573 553 file_update_time(file); 554 + if (ictx->ops->post_modify) 555 + ictx->ops->post_modify(inode); 574 556 ret = VM_FAULT_LOCKED; 575 557 out: 576 558 sb_end_pagefault(inode->i_sb); 577 559 return ret; 578 560 } 579 561 EXPORT_SYMBOL(netfs_page_mkwrite); 580 - 581 - /* 582 - * Kill all the pages in the given range 583 - */ 584 - static void netfs_kill_pages(struct address_space *mapping, 585 - loff_t start, loff_t len) 586 - { 587 - struct folio *folio; 588 - pgoff_t index = start / PAGE_SIZE; 589 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 590 - 591 - _enter("%llx-%llx", start, start + len - 1); 592 - 593 - do { 594 - _debug("kill %lx (to %lx)", index, last); 595 - 596 - folio = filemap_get_folio(mapping, index); 597 - if (IS_ERR(folio)) { 598 - next = index + 1; 599 - continue; 600 - } 601 - 602 - next = folio_next_index(folio); 603 - 604 - trace_netfs_folio(folio, netfs_folio_trace_kill); 605 - folio_clear_uptodate(folio); 606 - if (folio_test_fscache(folio)) 607 - folio_end_fscache(folio); 608 - folio_end_writeback(folio); 609 - folio_lock(folio); 610 - generic_error_remove_folio(mapping, folio); 611 - folio_unlock(folio); 612 - folio_put(folio); 613 - 614 - } while (index = next, index <= last); 615 - 616 - _leave(""); 617 - } 618 - 619 - /* 620 - * Redirty all the pages in a given range. 621 - */ 622 - static void netfs_redirty_pages(struct address_space *mapping, 623 - loff_t start, loff_t len) 624 - { 625 - struct folio *folio; 626 - pgoff_t index = start / PAGE_SIZE; 627 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 628 - 629 - _enter("%llx-%llx", start, start + len - 1); 630 - 631 - do { 632 - _debug("redirty %llx @%llx", len, start); 633 - 634 - folio = filemap_get_folio(mapping, index); 635 - if (IS_ERR(folio)) { 636 - next = index + 1; 637 - continue; 638 - } 639 - 640 - next = folio_next_index(folio); 641 - trace_netfs_folio(folio, netfs_folio_trace_redirty); 642 - filemap_dirty_folio(mapping, folio); 643 - if (folio_test_fscache(folio)) 644 - folio_end_fscache(folio); 645 - folio_end_writeback(folio); 646 - folio_put(folio); 647 - } while (index = next, index <= last); 648 - 649 - balance_dirty_pages_ratelimited(mapping); 650 - 651 - _leave(""); 652 - } 653 - 654 - /* 655 - * Completion of write to server 656 - */ 657 - static void netfs_pages_written_back(struct netfs_io_request *wreq) 658 - { 659 - struct address_space *mapping = wreq->mapping; 660 - struct netfs_folio *finfo; 661 - struct netfs_group *group = NULL; 662 - struct folio *folio; 663 - pgoff_t last; 664 - int gcount = 0; 665 - 666 - XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); 667 - 668 - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 669 - 670 - rcu_read_lock(); 671 - 672 - last = (wreq->start + wreq->len - 1) / PAGE_SIZE; 673 - xas_for_each(&xas, folio, last) { 674 - WARN(!folio_test_writeback(folio), 675 - "bad %zx @%llx page %lx %lx\n", 676 - wreq->len, wreq->start, folio->index, last); 677 - 678 - if ((finfo = netfs_folio_info(folio))) { 679 - /* Streaming writes cannot be redirtied whilst under 680 - * writeback, so discard the streaming record. 681 - */ 682 - folio_detach_private(folio); 683 - group = finfo->netfs_group; 684 - gcount++; 685 - trace_netfs_folio(folio, netfs_folio_trace_clear_s); 686 - kfree(finfo); 687 - } else if ((group = netfs_folio_group(folio))) { 688 - /* Need to detach the group pointer if the page didn't 689 - * get redirtied. If it has been redirtied, then it 690 - * must be within the same group. 691 - */ 692 - if (folio_test_dirty(folio)) { 693 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 694 - goto end_wb; 695 - } 696 - if (folio_trylock(folio)) { 697 - if (!folio_test_dirty(folio)) { 698 - folio_detach_private(folio); 699 - gcount++; 700 - trace_netfs_folio(folio, netfs_folio_trace_clear_g); 701 - } else { 702 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 703 - } 704 - folio_unlock(folio); 705 - goto end_wb; 706 - } 707 - 708 - xas_pause(&xas); 709 - rcu_read_unlock(); 710 - folio_lock(folio); 711 - if (!folio_test_dirty(folio)) { 712 - folio_detach_private(folio); 713 - gcount++; 714 - trace_netfs_folio(folio, netfs_folio_trace_clear_g); 715 - } else { 716 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 717 - } 718 - folio_unlock(folio); 719 - rcu_read_lock(); 720 - } else { 721 - trace_netfs_folio(folio, netfs_folio_trace_clear); 722 - } 723 - end_wb: 724 - if (folio_test_fscache(folio)) 725 - folio_end_fscache(folio); 726 - xas_advance(&xas, folio_next_index(folio) - 1); 727 - folio_end_writeback(folio); 728 - } 729 - 730 - rcu_read_unlock(); 731 - netfs_put_group_many(group, gcount); 732 - _leave(""); 733 - } 734 - 735 - /* 736 - * Deal with the disposition of the folios that are under writeback to close 737 - * out the operation. 738 - */ 739 - static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) 740 - { 741 - struct address_space *mapping = wreq->mapping; 742 - 743 - _enter(""); 744 - 745 - switch (wreq->error) { 746 - case 0: 747 - netfs_pages_written_back(wreq); 748 - break; 749 - 750 - default: 751 - pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); 752 - fallthrough; 753 - case -EACCES: 754 - case -EPERM: 755 - case -ENOKEY: 756 - case -EKEYEXPIRED: 757 - case -EKEYREJECTED: 758 - case -EKEYREVOKED: 759 - case -ENETRESET: 760 - case -EDQUOT: 761 - case -ENOSPC: 762 - netfs_redirty_pages(mapping, wreq->start, wreq->len); 763 - break; 764 - 765 - case -EROFS: 766 - case -EIO: 767 - case -EREMOTEIO: 768 - case -EFBIG: 769 - case -ENOENT: 770 - case -ENOMEDIUM: 771 - case -ENXIO: 772 - netfs_kill_pages(mapping, wreq->start, wreq->len); 773 - break; 774 - } 775 - 776 - if (wreq->error) 777 - mapping_set_error(mapping, wreq->error); 778 - if (wreq->netfs_ops->done) 779 - wreq->netfs_ops->done(wreq); 780 - } 781 - 782 - /* 783 - * Extend the region to be written back to include subsequent contiguously 784 - * dirty pages if possible, but don't sleep while doing so. 785 - * 786 - * If this page holds new content, then we can include filler zeros in the 787 - * writeback. 788 - */ 789 - static void netfs_extend_writeback(struct address_space *mapping, 790 - struct netfs_group *group, 791 - struct xa_state *xas, 792 - long *_count, 793 - loff_t start, 794 - loff_t max_len, 795 - bool caching, 796 - size_t *_len, 797 - size_t *_top) 798 - { 799 - struct netfs_folio *finfo; 800 - struct folio_batch fbatch; 801 - struct folio *folio; 802 - unsigned int i; 803 - pgoff_t index = (start + *_len) / PAGE_SIZE; 804 - size_t len; 805 - void *priv; 806 - bool stop = true; 807 - 808 - folio_batch_init(&fbatch); 809 - 810 - do { 811 - /* Firstly, we gather up a batch of contiguous dirty pages 812 - * under the RCU read lock - but we can't clear the dirty flags 813 - * there if any of those pages are mapped. 814 - */ 815 - rcu_read_lock(); 816 - 817 - xas_for_each(xas, folio, ULONG_MAX) { 818 - stop = true; 819 - if (xas_retry(xas, folio)) 820 - continue; 821 - if (xa_is_value(folio)) 822 - break; 823 - if (folio->index != index) { 824 - xas_reset(xas); 825 - break; 826 - } 827 - 828 - if (!folio_try_get_rcu(folio)) { 829 - xas_reset(xas); 830 - continue; 831 - } 832 - 833 - /* Has the folio moved or been split? */ 834 - if (unlikely(folio != xas_reload(xas))) { 835 - folio_put(folio); 836 - xas_reset(xas); 837 - break; 838 - } 839 - 840 - if (!folio_trylock(folio)) { 841 - folio_put(folio); 842 - xas_reset(xas); 843 - break; 844 - } 845 - if (!folio_test_dirty(folio) || 846 - folio_test_writeback(folio) || 847 - folio_test_fscache(folio)) { 848 - folio_unlock(folio); 849 - folio_put(folio); 850 - xas_reset(xas); 851 - break; 852 - } 853 - 854 - stop = false; 855 - len = folio_size(folio); 856 - priv = folio_get_private(folio); 857 - if ((const struct netfs_group *)priv != group) { 858 - stop = true; 859 - finfo = netfs_folio_info(folio); 860 - if (finfo->netfs_group != group || 861 - finfo->dirty_offset > 0) { 862 - folio_unlock(folio); 863 - folio_put(folio); 864 - xas_reset(xas); 865 - break; 866 - } 867 - len = finfo->dirty_len; 868 - } 869 - 870 - *_top += folio_size(folio); 871 - index += folio_nr_pages(folio); 872 - *_count -= folio_nr_pages(folio); 873 - *_len += len; 874 - if (*_len >= max_len || *_count <= 0) 875 - stop = true; 876 - 877 - if (!folio_batch_add(&fbatch, folio)) 878 - break; 879 - if (stop) 880 - break; 881 - } 882 - 883 - xas_pause(xas); 884 - rcu_read_unlock(); 885 - 886 - /* Now, if we obtained any folios, we can shift them to being 887 - * writable and mark them for caching. 888 - */ 889 - if (!folio_batch_count(&fbatch)) 890 - break; 891 - 892 - for (i = 0; i < folio_batch_count(&fbatch); i++) { 893 - folio = fbatch.folios[i]; 894 - trace_netfs_folio(folio, netfs_folio_trace_store_plus); 895 - 896 - if (!folio_clear_dirty_for_io(folio)) 897 - BUG(); 898 - folio_start_writeback(folio); 899 - netfs_folio_start_fscache(caching, folio); 900 - folio_unlock(folio); 901 - } 902 - 903 - folio_batch_release(&fbatch); 904 - cond_resched(); 905 - } while (!stop); 906 - } 907 - 908 - /* 909 - * Synchronously write back the locked page and any subsequent non-locked dirty 910 - * pages. 911 - */ 912 - static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, 913 - struct writeback_control *wbc, 914 - struct netfs_group *group, 915 - struct xa_state *xas, 916 - struct folio *folio, 917 - unsigned long long start, 918 - unsigned long long end) 919 - { 920 - struct netfs_io_request *wreq; 921 - struct netfs_folio *finfo; 922 - struct netfs_inode *ctx = netfs_inode(mapping->host); 923 - unsigned long long i_size = i_size_read(&ctx->inode); 924 - size_t len, max_len; 925 - bool caching = netfs_is_cache_enabled(ctx); 926 - long count = wbc->nr_to_write; 927 - int ret; 928 - 929 - _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); 930 - 931 - wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), 932 - NETFS_WRITEBACK); 933 - if (IS_ERR(wreq)) { 934 - folio_unlock(folio); 935 - return PTR_ERR(wreq); 936 - } 937 - 938 - if (!folio_clear_dirty_for_io(folio)) 939 - BUG(); 940 - folio_start_writeback(folio); 941 - netfs_folio_start_fscache(caching, folio); 942 - 943 - count -= folio_nr_pages(folio); 944 - 945 - /* Find all consecutive lockable dirty pages that have contiguous 946 - * written regions, stopping when we find a page that is not 947 - * immediately lockable, is not dirty or is missing, or we reach the 948 - * end of the range. 949 - */ 950 - trace_netfs_folio(folio, netfs_folio_trace_store); 951 - 952 - len = wreq->len; 953 - finfo = netfs_folio_info(folio); 954 - if (finfo) { 955 - start += finfo->dirty_offset; 956 - if (finfo->dirty_offset + finfo->dirty_len != len) { 957 - len = finfo->dirty_len; 958 - goto cant_expand; 959 - } 960 - len = finfo->dirty_len; 961 - } 962 - 963 - if (start < i_size) { 964 - /* Trim the write to the EOF; the extra data is ignored. Also 965 - * put an upper limit on the size of a single storedata op. 966 - */ 967 - max_len = 65536 * 4096; 968 - max_len = min_t(unsigned long long, max_len, end - start + 1); 969 - max_len = min_t(unsigned long long, max_len, i_size - start); 970 - 971 - if (len < max_len) 972 - netfs_extend_writeback(mapping, group, xas, &count, start, 973 - max_len, caching, &len, &wreq->upper_len); 974 - } 975 - 976 - cant_expand: 977 - len = min_t(unsigned long long, len, i_size - start); 978 - 979 - /* We now have a contiguous set of dirty pages, each with writeback 980 - * set; the first page is still locked at this point, but all the rest 981 - * have been unlocked. 982 - */ 983 - folio_unlock(folio); 984 - wreq->start = start; 985 - wreq->len = len; 986 - 987 - if (start < i_size) { 988 - _debug("write back %zx @%llx [%llx]", len, start, i_size); 989 - 990 - /* Speculatively write to the cache. We have to fix this up 991 - * later if the store fails. 992 - */ 993 - wreq->cleanup = netfs_cleanup_buffered_write; 994 - 995 - iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, 996 - wreq->upper_len); 997 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 998 - ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); 999 - if (ret == 0 || ret == -EIOCBQUEUED) 1000 - wbc->nr_to_write -= len / PAGE_SIZE; 1001 - } else { 1002 - _debug("write discard %zx @%llx [%llx]", len, start, i_size); 1003 - 1004 - /* The dirty region was entirely beyond the EOF. */ 1005 - fscache_clear_page_bits(mapping, start, len, caching); 1006 - netfs_pages_written_back(wreq); 1007 - ret = 0; 1008 - } 1009 - 1010 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1011 - _leave(" = 1"); 1012 - return 1; 1013 - } 1014 - 1015 - /* 1016 - * Write a region of pages back to the server 1017 - */ 1018 - static ssize_t netfs_writepages_begin(struct address_space *mapping, 1019 - struct writeback_control *wbc, 1020 - struct netfs_group *group, 1021 - struct xa_state *xas, 1022 - unsigned long long *_start, 1023 - unsigned long long end) 1024 - { 1025 - const struct netfs_folio *finfo; 1026 - struct folio *folio; 1027 - unsigned long long start = *_start; 1028 - ssize_t ret; 1029 - void *priv; 1030 - int skips = 0; 1031 - 1032 - _enter("%llx,%llx,", start, end); 1033 - 1034 - search_again: 1035 - /* Find the first dirty page in the group. */ 1036 - rcu_read_lock(); 1037 - 1038 - for (;;) { 1039 - folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 1040 - if (xas_retry(xas, folio) || xa_is_value(folio)) 1041 - continue; 1042 - if (!folio) 1043 - break; 1044 - 1045 - if (!folio_try_get_rcu(folio)) { 1046 - xas_reset(xas); 1047 - continue; 1048 - } 1049 - 1050 - if (unlikely(folio != xas_reload(xas))) { 1051 - folio_put(folio); 1052 - xas_reset(xas); 1053 - continue; 1054 - } 1055 - 1056 - /* Skip any dirty folio that's not in the group of interest. */ 1057 - priv = folio_get_private(folio); 1058 - if ((const struct netfs_group *)priv != group) { 1059 - finfo = netfs_folio_info(folio); 1060 - if (finfo->netfs_group != group) { 1061 - folio_put(folio); 1062 - continue; 1063 - } 1064 - } 1065 - 1066 - xas_pause(xas); 1067 - break; 1068 - } 1069 - rcu_read_unlock(); 1070 - if (!folio) 1071 - return 0; 1072 - 1073 - start = folio_pos(folio); /* May regress with THPs */ 1074 - 1075 - _debug("wback %lx", folio->index); 1076 - 1077 - /* At this point we hold neither the i_pages lock nor the page lock: 1078 - * the page may be truncated or invalidated (changing page->mapping to 1079 - * NULL), or even swizzled back from swapper_space to tmpfs file 1080 - * mapping 1081 - */ 1082 - lock_again: 1083 - if (wbc->sync_mode != WB_SYNC_NONE) { 1084 - ret = folio_lock_killable(folio); 1085 - if (ret < 0) 1086 - return ret; 1087 - } else { 1088 - if (!folio_trylock(folio)) 1089 - goto search_again; 1090 - } 1091 - 1092 - if (folio->mapping != mapping || 1093 - !folio_test_dirty(folio)) { 1094 - start += folio_size(folio); 1095 - folio_unlock(folio); 1096 - goto search_again; 1097 - } 1098 - 1099 - if (folio_test_writeback(folio) || 1100 - folio_test_fscache(folio)) { 1101 - folio_unlock(folio); 1102 - if (wbc->sync_mode != WB_SYNC_NONE) { 1103 - folio_wait_writeback(folio); 1104 - #ifdef CONFIG_FSCACHE 1105 - folio_wait_fscache(folio); 1106 - #endif 1107 - goto lock_again; 1108 - } 1109 - 1110 - start += folio_size(folio); 1111 - if (wbc->sync_mode == WB_SYNC_NONE) { 1112 - if (skips >= 5 || need_resched()) { 1113 - ret = 0; 1114 - goto out; 1115 - } 1116 - skips++; 1117 - } 1118 - goto search_again; 1119 - } 1120 - 1121 - ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, 1122 - folio, start, end); 1123 - out: 1124 - if (ret > 0) 1125 - *_start = start + ret; 1126 - _leave(" = %zd [%llx]", ret, *_start); 1127 - return ret; 1128 - } 1129 - 1130 - /* 1131 - * Write a region of pages back to the server 1132 - */ 1133 - static int netfs_writepages_region(struct address_space *mapping, 1134 - struct writeback_control *wbc, 1135 - struct netfs_group *group, 1136 - unsigned long long *_start, 1137 - unsigned long long end) 1138 - { 1139 - ssize_t ret; 1140 - 1141 - XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 1142 - 1143 - do { 1144 - ret = netfs_writepages_begin(mapping, wbc, group, &xas, 1145 - _start, end); 1146 - if (ret > 0 && wbc->nr_to_write > 0) 1147 - cond_resched(); 1148 - } while (ret > 0 && wbc->nr_to_write > 0); 1149 - 1150 - return ret > 0 ? 0 : ret; 1151 - } 1152 - 1153 - /* 1154 - * write some of the pending data back to the server 1155 - */ 1156 - int netfs_writepages(struct address_space *mapping, 1157 - struct writeback_control *wbc) 1158 - { 1159 - struct netfs_group *group = NULL; 1160 - loff_t start, end; 1161 - int ret; 1162 - 1163 - _enter(""); 1164 - 1165 - /* We have to be careful as we can end up racing with setattr() 1166 - * truncating the pagecache since the caller doesn't take a lock here 1167 - * to prevent it. 1168 - */ 1169 - 1170 - if (wbc->range_cyclic && mapping->writeback_index) { 1171 - start = mapping->writeback_index * PAGE_SIZE; 1172 - ret = netfs_writepages_region(mapping, wbc, group, 1173 - &start, LLONG_MAX); 1174 - if (ret < 0) 1175 - goto out; 1176 - 1177 - if (wbc->nr_to_write <= 0) { 1178 - mapping->writeback_index = start / PAGE_SIZE; 1179 - goto out; 1180 - } 1181 - 1182 - start = 0; 1183 - end = mapping->writeback_index * PAGE_SIZE; 1184 - mapping->writeback_index = 0; 1185 - ret = netfs_writepages_region(mapping, wbc, group, &start, end); 1186 - if (ret == 0) 1187 - mapping->writeback_index = start / PAGE_SIZE; 1188 - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 1189 - start = 0; 1190 - ret = netfs_writepages_region(mapping, wbc, group, 1191 - &start, LLONG_MAX); 1192 - if (wbc->nr_to_write > 0 && ret == 0) 1193 - mapping->writeback_index = start / PAGE_SIZE; 1194 - } else { 1195 - start = wbc->range_start; 1196 - ret = netfs_writepages_region(mapping, wbc, group, 1197 - &start, wbc->range_end); 1198 - } 1199 - 1200 - out: 1201 - _leave(" = %d", ret); 1202 - return ret; 1203 - } 1204 - EXPORT_SYMBOL(netfs_writepages); 1205 - 1206 - /* 1207 - * Deal with the disposition of a laundered folio. 1208 - */ 1209 - static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) 1210 - { 1211 - if (wreq->error) { 1212 - pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); 1213 - mapping_set_error(wreq->mapping, wreq->error); 1214 - } 1215 - } 1216 - 1217 - /** 1218 - * netfs_launder_folio - Clean up a dirty folio that's being invalidated 1219 - * @folio: The folio to clean 1220 - * 1221 - * This is called to write back a folio that's being invalidated when an inode 1222 - * is getting torn down. Ideally, writepages would be used instead. 1223 - */ 1224 - int netfs_launder_folio(struct folio *folio) 1225 - { 1226 - struct netfs_io_request *wreq; 1227 - struct address_space *mapping = folio->mapping; 1228 - struct netfs_folio *finfo = netfs_folio_info(folio); 1229 - struct netfs_group *group = netfs_folio_group(folio); 1230 - struct bio_vec bvec; 1231 - unsigned long long i_size = i_size_read(mapping->host); 1232 - unsigned long long start = folio_pos(folio); 1233 - size_t offset = 0, len; 1234 - int ret = 0; 1235 - 1236 - if (finfo) { 1237 - offset = finfo->dirty_offset; 1238 - start += offset; 1239 - len = finfo->dirty_len; 1240 - } else { 1241 - len = folio_size(folio); 1242 - } 1243 - len = min_t(unsigned long long, len, i_size - start); 1244 - 1245 - wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); 1246 - if (IS_ERR(wreq)) { 1247 - ret = PTR_ERR(wreq); 1248 - goto out; 1249 - } 1250 - 1251 - if (!folio_clear_dirty_for_io(folio)) 1252 - goto out_put; 1253 - 1254 - trace_netfs_folio(folio, netfs_folio_trace_launder); 1255 - 1256 - _debug("launder %llx-%llx", start, start + len - 1); 1257 - 1258 - /* Speculatively write to the cache. We have to fix this up later if 1259 - * the store fails. 1260 - */ 1261 - wreq->cleanup = netfs_cleanup_launder_folio; 1262 - 1263 - bvec_set_folio(&bvec, folio, len, offset); 1264 - iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); 1265 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 1266 - ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); 1267 - 1268 - out_put: 1269 - folio_detach_private(folio); 1270 - netfs_put_group(group); 1271 - kfree(finfo); 1272 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1273 - out: 1274 - folio_wait_fscache(folio); 1275 - _leave(" = %d", ret); 1276 - return ret; 1277 - } 1278 - EXPORT_SYMBOL(netfs_launder_folio);
+39 -17
fs/netfs/direct_write.c
··· 34 34 unsigned long long start = iocb->ki_pos; 35 35 unsigned long long end = start + iov_iter_count(iter); 36 36 ssize_t ret, n; 37 + size_t len = iov_iter_count(iter); 37 38 bool async = !is_sync_kiocb(iocb); 38 39 39 40 _enter(""); ··· 47 46 48 47 _debug("uw %llx-%llx", start, end); 49 48 50 - wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, 51 - start, end - start, 52 - iocb->ki_flags & IOCB_DIRECT ? 53 - NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); 49 + wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, 50 + iocb->ki_flags & IOCB_DIRECT ? 51 + NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); 54 52 if (IS_ERR(wreq)) 55 53 return PTR_ERR(wreq); 54 + 55 + wreq->io_streams[0].avail = true; 56 + trace_netfs_write(wreq, (iocb->ki_flags & IOCB_DIRECT ? 57 + netfs_write_trace_dio_write : 58 + netfs_write_trace_unbuffered_write)); 56 59 57 60 { 58 61 /* If this is an async op and we're not using a bounce buffer, ··· 68 63 * request. 69 64 */ 70 65 if (async || user_backed_iter(iter)) { 71 - n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0); 66 + n = netfs_extract_user_iter(iter, len, &wreq->iter, 0); 72 67 if (n < 0) { 73 68 ret = n; 74 69 goto out; ··· 76 71 wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec; 77 72 wreq->direct_bv_count = n; 78 73 wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); 79 - wreq->len = iov_iter_count(&wreq->iter); 80 74 } else { 81 75 wreq->iter = *iter; 82 76 } 83 77 84 78 wreq->io_iter = wreq->iter; 85 79 } 80 + 81 + __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); 86 82 87 83 /* Copy the data into the bounce buffer and encrypt it. */ 88 84 // TODO ··· 93 87 if (async) 94 88 wreq->iocb = iocb; 95 89 wreq->cleanup = netfs_cleanup_dio_write; 96 - ret = netfs_begin_write(wreq, is_sync_kiocb(iocb), 97 - iocb->ki_flags & IOCB_DIRECT ? 98 - netfs_write_trace_dio_write : 99 - netfs_write_trace_unbuffered_write); 90 + ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter)); 100 91 if (ret < 0) { 101 92 _debug("begin = %zd", ret); 102 93 goto out; ··· 103 100 trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); 104 101 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 105 102 TASK_UNINTERRUPTIBLE); 106 - 103 + smp_rmb(); /* Read error/transferred after RIP flag */ 107 104 ret = wreq->error; 108 - _debug("waited = %zd", ret); 109 105 if (ret == 0) { 110 106 ret = wreq->transferred; 111 107 iocb->ki_pos += ret; ··· 134 132 ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) 135 133 { 136 134 struct file *file = iocb->ki_filp; 137 - struct inode *inode = file->f_mapping->host; 135 + struct address_space *mapping = file->f_mapping; 136 + struct inode *inode = mapping->host; 138 137 struct netfs_inode *ictx = netfs_inode(inode); 139 - unsigned long long end; 140 138 ssize_t ret; 139 + loff_t pos = iocb->ki_pos; 140 + unsigned long long end = pos + iov_iter_count(from) - 1; 141 141 142 - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 142 + _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); 143 143 144 144 if (!iov_iter_count(from)) 145 145 return 0; 146 146 147 147 trace_netfs_write_iter(iocb, from); 148 - netfs_stat(&netfs_n_rh_dio_write); 148 + netfs_stat(&netfs_n_wh_dio_write); 149 149 150 150 ret = netfs_start_io_direct(inode); 151 151 if (ret < 0) ··· 161 157 ret = file_update_time(file); 162 158 if (ret < 0) 163 159 goto out; 164 - ret = kiocb_invalidate_pages(iocb, iov_iter_count(from)); 160 + if (iocb->ki_flags & IOCB_NOWAIT) { 161 + /* We could block if there are any pages in the range. */ 162 + ret = -EAGAIN; 163 + if (filemap_range_has_page(mapping, pos, end)) 164 + if (filemap_invalidate_inode(inode, true, pos, end)) 165 + goto out; 166 + } else { 167 + ret = filemap_write_and_wait_range(mapping, pos, end); 168 + if (ret < 0) 169 + goto out; 170 + } 171 + 172 + /* 173 + * After a write we want buffered reads to be sure to go to disk to get 174 + * the new data. We invalidate clean cached page from the region we're 175 + * about to write. We do this *before* the write so that we can return 176 + * without clobbering -EIOCBQUEUED from ->direct_IO(). 177 + */ 178 + ret = filemap_invalidate_inode(inode, true, pos, end); 165 179 if (ret < 0) 166 180 goto out; 167 181 end = iocb->ki_pos + iov_iter_count(from);
+9 -5
fs/netfs/fscache_io.c
··· 166 166 loff_t start; 167 167 size_t len; 168 168 bool set_bits; 169 + bool using_pgpriv2; 169 170 netfs_io_terminated_t term_func; 170 171 void *term_func_priv; 171 172 }; ··· 183 182 184 183 rcu_read_lock(); 185 184 xas_for_each(&xas, page, last) { 186 - end_page_fscache(page); 185 + folio_end_private_2(page_folio(page)); 187 186 } 188 187 rcu_read_unlock(); 189 188 } ··· 198 197 { 199 198 struct fscache_write_request *wreq = priv; 200 199 201 - fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, 202 - wreq->set_bits); 200 + if (wreq->using_pgpriv2) 201 + fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, 202 + wreq->set_bits); 203 203 204 204 if (wreq->term_func) 205 205 wreq->term_func(wreq->term_func_priv, transferred_or_error, ··· 214 212 loff_t start, size_t len, loff_t i_size, 215 213 netfs_io_terminated_t term_func, 216 214 void *term_func_priv, 217 - bool cond) 215 + bool using_pgpriv2, bool cond) 218 216 { 219 217 struct fscache_write_request *wreq; 220 218 struct netfs_cache_resources *cres; ··· 232 230 wreq->mapping = mapping; 233 231 wreq->start = start; 234 232 wreq->len = len; 233 + wreq->using_pgpriv2 = using_pgpriv2; 235 234 wreq->set_bits = cond; 236 235 wreq->term_func = term_func; 237 236 wreq->term_func_priv = term_func_priv; ··· 260 257 abandon_free: 261 258 kfree(wreq); 262 259 abandon: 263 - fscache_clear_page_bits(mapping, start, len, cond); 260 + if (using_pgpriv2) 261 + fscache_clear_page_bits(mapping, start, len, cond); 264 262 if (term_func) 265 263 term_func(term_func_priv, ret, false); 266 264 }
+41 -14
fs/netfs/internal.h
··· 37 37 extern unsigned int netfs_debug; 38 38 extern struct list_head netfs_io_requests; 39 39 extern spinlock_t netfs_proc_lock; 40 + extern mempool_t netfs_request_pool; 41 + extern mempool_t netfs_subrequest_pool; 40 42 41 43 #ifdef CONFIG_PROC_FS 42 44 static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) ··· 93 91 } 94 92 95 93 /* 96 - * output.c 97 - */ 98 - int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 99 - enum netfs_write_trace what); 100 - struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 101 - int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end); 102 - int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb); 103 - 104 - /* 105 94 * stats.c 106 95 */ 107 96 #ifdef CONFIG_NETFS_STATS 108 97 extern atomic_t netfs_n_rh_dio_read; 109 - extern atomic_t netfs_n_rh_dio_write; 110 98 extern atomic_t netfs_n_rh_readahead; 111 - extern atomic_t netfs_n_rh_readpage; 99 + extern atomic_t netfs_n_rh_read_folio; 112 100 extern atomic_t netfs_n_rh_rreq; 113 101 extern atomic_t netfs_n_rh_sreq; 114 102 extern atomic_t netfs_n_rh_download; ··· 115 123 extern atomic_t netfs_n_rh_write_done; 116 124 extern atomic_t netfs_n_rh_write_failed; 117 125 extern atomic_t netfs_n_rh_write_zskip; 126 + extern atomic_t netfs_n_wh_buffered_write; 127 + extern atomic_t netfs_n_wh_writethrough; 128 + extern atomic_t netfs_n_wh_dio_write; 129 + extern atomic_t netfs_n_wh_writepages; 118 130 extern atomic_t netfs_n_wh_wstream_conflict; 119 131 extern atomic_t netfs_n_wh_upload; 120 132 extern atomic_t netfs_n_wh_upload_done; ··· 145 149 #endif 146 150 147 151 /* 152 + * write_collect.c 153 + */ 154 + int netfs_folio_written_back(struct folio *folio); 155 + void netfs_write_collection_worker(struct work_struct *work); 156 + void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async); 157 + 158 + /* 159 + * write_issue.c 160 + */ 161 + struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 162 + struct file *file, 163 + loff_t start, 164 + enum netfs_io_origin origin); 165 + void netfs_reissue_write(struct netfs_io_stream *stream, 166 + struct netfs_io_subrequest *subreq); 167 + int netfs_advance_write(struct netfs_io_request *wreq, 168 + struct netfs_io_stream *stream, 169 + loff_t start, size_t len, bool to_eof); 170 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 171 + int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 172 + struct folio *folio, size_t copied, bool to_page_end, 173 + struct folio **writethrough_cache); 174 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 175 + struct folio *writethrough_cache); 176 + int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); 177 + 178 + /* 148 179 * Miscellaneous functions. 149 180 */ 150 181 static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx) ··· 191 168 */ 192 169 static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) 193 170 { 194 - if (netfs_group) 171 + if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE) 195 172 refcount_inc(&netfs_group->ref); 196 173 return netfs_group; 197 174 } ··· 201 178 */ 202 179 static inline void netfs_put_group(struct netfs_group *netfs_group) 203 180 { 204 - if (netfs_group && refcount_dec_and_test(&netfs_group->ref)) 181 + if (netfs_group && 182 + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && 183 + refcount_dec_and_test(&netfs_group->ref)) 205 184 netfs_group->free(netfs_group); 206 185 } 207 186 ··· 212 187 */ 213 188 static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) 214 189 { 215 - if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref)) 190 + if (netfs_group && 191 + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && 192 + refcount_sub_and_test(nr, &netfs_group->ref)) 216 193 netfs_group->free(netfs_group); 217 194 } 218 195
+11 -151
fs/netfs/io.c
··· 99 99 } 100 100 101 101 /* 102 - * Deal with the completion of writing the data to the cache. We have to clear 103 - * the PG_fscache bits on the folios involved and release the caller's ref. 104 - * 105 - * May be called in softirq mode and we inherit a ref from the caller. 106 - */ 107 - static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, 108 - bool was_async) 109 - { 110 - struct netfs_io_subrequest *subreq; 111 - struct folio *folio; 112 - pgoff_t unlocked = 0; 113 - bool have_unlocked = false; 114 - 115 - rcu_read_lock(); 116 - 117 - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 118 - XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); 119 - 120 - xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { 121 - if (xas_retry(&xas, folio)) 122 - continue; 123 - 124 - /* We might have multiple writes from the same huge 125 - * folio, but we mustn't unlock a folio more than once. 126 - */ 127 - if (have_unlocked && folio->index <= unlocked) 128 - continue; 129 - unlocked = folio_next_index(folio) - 1; 130 - trace_netfs_folio(folio, netfs_folio_trace_end_copy); 131 - folio_end_fscache(folio); 132 - have_unlocked = true; 133 - } 134 - } 135 - 136 - rcu_read_unlock(); 137 - netfs_rreq_completed(rreq, was_async); 138 - } 139 - 140 - static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, 141 - bool was_async) 142 - { 143 - struct netfs_io_subrequest *subreq = priv; 144 - struct netfs_io_request *rreq = subreq->rreq; 145 - 146 - if (IS_ERR_VALUE(transferred_or_error)) { 147 - netfs_stat(&netfs_n_rh_write_failed); 148 - trace_netfs_failure(rreq, subreq, transferred_or_error, 149 - netfs_fail_copy_to_cache); 150 - } else { 151 - netfs_stat(&netfs_n_rh_write_done); 152 - } 153 - 154 - trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); 155 - 156 - /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ 157 - if (atomic_dec_and_test(&rreq->nr_copy_ops)) 158 - netfs_rreq_unmark_after_write(rreq, was_async); 159 - 160 - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 161 - } 162 - 163 - /* 164 - * Perform any outstanding writes to the cache. We inherit a ref from the 165 - * caller. 166 - */ 167 - static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) 168 - { 169 - struct netfs_cache_resources *cres = &rreq->cache_resources; 170 - struct netfs_io_subrequest *subreq, *next, *p; 171 - struct iov_iter iter; 172 - int ret; 173 - 174 - trace_netfs_rreq(rreq, netfs_rreq_trace_copy); 175 - 176 - /* We don't want terminating writes trying to wake us up whilst we're 177 - * still going through the list. 178 - */ 179 - atomic_inc(&rreq->nr_copy_ops); 180 - 181 - list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { 182 - if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 183 - list_del_init(&subreq->rreq_link); 184 - netfs_put_subrequest(subreq, false, 185 - netfs_sreq_trace_put_no_copy); 186 - } 187 - } 188 - 189 - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 190 - /* Amalgamate adjacent writes */ 191 - while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 192 - next = list_next_entry(subreq, rreq_link); 193 - if (next->start != subreq->start + subreq->len) 194 - break; 195 - subreq->len += next->len; 196 - list_del_init(&next->rreq_link); 197 - netfs_put_subrequest(next, false, 198 - netfs_sreq_trace_put_merged); 199 - } 200 - 201 - ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, 202 - subreq->len, rreq->i_size, true); 203 - if (ret < 0) { 204 - trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); 205 - trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); 206 - continue; 207 - } 208 - 209 - iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, 210 - subreq->start, subreq->len); 211 - 212 - atomic_inc(&rreq->nr_copy_ops); 213 - netfs_stat(&netfs_n_rh_write); 214 - netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); 215 - trace_netfs_sreq(subreq, netfs_sreq_trace_write); 216 - cres->ops->write(cres, subreq->start, &iter, 217 - netfs_rreq_copy_terminated, subreq); 218 - } 219 - 220 - /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ 221 - if (atomic_dec_and_test(&rreq->nr_copy_ops)) 222 - netfs_rreq_unmark_after_write(rreq, false); 223 - } 224 - 225 - static void netfs_rreq_write_to_cache_work(struct work_struct *work) 226 - { 227 - struct netfs_io_request *rreq = 228 - container_of(work, struct netfs_io_request, work); 229 - 230 - netfs_rreq_do_write_to_cache(rreq); 231 - } 232 - 233 - static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) 234 - { 235 - rreq->work.func = netfs_rreq_write_to_cache_work; 236 - if (!queue_work(system_unbound_wq, &rreq->work)) 237 - BUG(); 238 - } 239 - 240 - /* 241 102 * Handle a short read. 242 103 */ 243 104 static void netfs_rreq_short_read(struct netfs_io_request *rreq, ··· 213 352 unsigned int i; 214 353 size_t transferred = 0; 215 354 216 - for (i = 0; i < rreq->direct_bv_count; i++) 355 + for (i = 0; i < rreq->direct_bv_count; i++) { 217 356 flush_dcache_page(rreq->direct_bv[i].bv_page); 357 + // TODO: cifs marks pages in the destination buffer 358 + // dirty under some circumstances after a read. Do we 359 + // need to do that too? 360 + set_page_dirty(rreq->direct_bv[i].bv_page); 361 + } 218 362 219 363 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 220 364 if (subreq->error || subreq->transferred == 0) ··· 274 408 trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); 275 409 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 276 410 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); 277 - 278 - if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) 279 - return netfs_rreq_write_to_cache(rreq); 280 411 281 412 netfs_rreq_completed(rreq, was_async); 282 413 } ··· 481 618 482 619 set: 483 620 if (subreq->len > rreq->len) 484 - pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n", 621 + pr_warn("R=%08x[%u] SREQ>RREQ %zx > %llx\n", 485 622 rreq->debug_id, subreq->debug_index, 486 623 subreq->len, rreq->len); 487 624 ··· 506 643 * Slice off a piece of a read request and submit an I/O request for it. 507 644 */ 508 645 static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, 509 - struct iov_iter *io_iter, 510 - unsigned int *_debug_index) 646 + struct iov_iter *io_iter) 511 647 { 512 648 struct netfs_io_subrequest *subreq; 513 649 enum netfs_io_source source; ··· 515 653 if (!subreq) 516 654 return false; 517 655 518 - subreq->debug_index = (*_debug_index)++; 519 656 subreq->start = rreq->start + rreq->submitted; 520 657 subreq->len = io_iter->count; 521 658 522 - _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); 659 + _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); 523 660 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 524 661 525 662 /* Call out to the cache to find out what it can do with the remaining ··· 568 707 int netfs_begin_read(struct netfs_io_request *rreq, bool sync) 569 708 { 570 709 struct iov_iter io_iter; 571 - unsigned int debug_index = 0; 572 710 int ret; 573 711 574 712 _enter("R=%x %llx-%llx", ··· 593 733 atomic_set(&rreq->nr_outstanding, 1); 594 734 io_iter = rreq->io_iter; 595 735 do { 596 - _debug("submit %llx + %zx >= %llx", 736 + _debug("submit %llx + %llx >= %llx", 597 737 rreq->start, rreq->submitted, rreq->i_size); 598 738 if (rreq->origin == NETFS_DIO_READ && 599 739 rreq->start + rreq->submitted >= rreq->i_size) 600 740 break; 601 - if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index)) 741 + if (!netfs_rreq_submit_slice(rreq, &io_iter)) 602 742 break; 603 743 if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && 604 744 test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
+47 -8
fs/netfs/main.c
··· 7 7 8 8 #include <linux/module.h> 9 9 #include <linux/export.h> 10 + #include <linux/mempool.h> 10 11 #include <linux/proc_fs.h> 11 12 #include <linux/seq_file.h> 12 13 #include "internal.h" ··· 24 23 module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); 25 24 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); 26 25 26 + static struct kmem_cache *netfs_request_slab; 27 + static struct kmem_cache *netfs_subrequest_slab; 28 + mempool_t netfs_request_pool; 29 + mempool_t netfs_subrequest_pool; 30 + 27 31 #ifdef CONFIG_PROC_FS 28 32 LIST_HEAD(netfs_io_requests); 29 33 DEFINE_SPINLOCK(netfs_proc_lock); ··· 37 31 [NETFS_READAHEAD] = "RA", 38 32 [NETFS_READPAGE] = "RP", 39 33 [NETFS_READ_FOR_WRITE] = "RW", 34 + [NETFS_COPY_TO_CACHE] = "CC", 40 35 [NETFS_WRITEBACK] = "WB", 41 36 [NETFS_WRITETHROUGH] = "WT", 42 - [NETFS_LAUNDER_WRITE] = "LW", 43 37 [NETFS_UNBUFFERED_WRITE] = "UW", 44 38 [NETFS_DIO_READ] = "DR", 45 39 [NETFS_DIO_WRITE] = "DW", ··· 62 56 63 57 rreq = list_entry(v, struct netfs_io_request, proc_link); 64 58 seq_printf(m, 65 - "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx", 59 + "%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx", 66 60 rreq->debug_id, 67 61 netfs_origins[rreq->origin], 68 62 refcount_read(&rreq->ref), ··· 104 98 { 105 99 int ret = -ENOMEM; 106 100 101 + netfs_request_slab = kmem_cache_create("netfs_request", 102 + sizeof(struct netfs_io_request), 0, 103 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 104 + NULL); 105 + if (!netfs_request_slab) 106 + goto error_req; 107 + 108 + if (mempool_init_slab_pool(&netfs_request_pool, 100, netfs_request_slab) < 0) 109 + goto error_reqpool; 110 + 111 + netfs_subrequest_slab = kmem_cache_create("netfs_subrequest", 112 + sizeof(struct netfs_io_subrequest), 0, 113 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 114 + NULL); 115 + if (!netfs_subrequest_slab) 116 + goto error_subreq; 117 + 118 + if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) 119 + goto error_subreqpool; 120 + 107 121 if (!proc_mkdir("fs/netfs", NULL)) 108 - goto error; 122 + goto error_proc; 109 123 if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, 110 124 &netfs_requests_seq_ops)) 111 - goto error_proc; 125 + goto error_procfile; 112 126 #ifdef CONFIG_FSCACHE_STATS 113 127 if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, 114 128 netfs_stats_show)) 115 - goto error_proc; 129 + goto error_procfile; 116 130 #endif 117 131 118 132 ret = fscache_init(); 119 133 if (ret < 0) 120 - goto error_proc; 134 + goto error_fscache; 121 135 return 0; 122 136 123 - error_proc: 137 + error_fscache: 138 + error_procfile: 124 139 remove_proc_entry("fs/netfs", NULL); 125 - error: 140 + error_proc: 141 + mempool_exit(&netfs_subrequest_pool); 142 + error_subreqpool: 143 + kmem_cache_destroy(netfs_subrequest_slab); 144 + error_subreq: 145 + mempool_exit(&netfs_request_pool); 146 + error_reqpool: 147 + kmem_cache_destroy(netfs_request_slab); 148 + error_req: 126 149 return ret; 127 150 } 128 151 fs_initcall(netfs_init); ··· 160 125 { 161 126 fscache_exit(); 162 127 remove_proc_entry("fs/netfs", NULL); 128 + mempool_exit(&netfs_subrequest_pool); 129 + kmem_cache_destroy(netfs_subrequest_slab); 130 + mempool_exit(&netfs_request_pool); 131 + kmem_cache_destroy(netfs_request_slab); 163 132 } 164 133 module_exit(netfs_exit);
+1 -9
fs/netfs/misc.c
··· 177 177 */ 178 178 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 179 179 { 180 - struct netfs_folio *finfo = NULL; 180 + struct netfs_folio *finfo; 181 181 size_t flen = folio_size(folio); 182 182 183 183 _enter("{%lx},%zx,%zx", folio->index, offset, length); 184 - 185 - folio_wait_fscache(folio); 186 184 187 185 if (!folio_test_private(folio)) 188 186 return; ··· 246 248 247 249 if (folio_test_private(folio)) 248 250 return false; 249 - if (folio_test_fscache(folio)) { 250 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 251 - return false; 252 - folio_wait_fscache(folio); 253 - } 254 - 255 251 fscache_note_page_release(netfs_i_cookie(ctx)); 256 252 return true; 257 253 }
+60 -19
fs/netfs/objects.c
··· 6 6 */ 7 7 8 8 #include <linux/slab.h> 9 + #include <linux/mempool.h> 10 + #include <linux/delay.h> 9 11 #include "internal.h" 10 12 11 13 /* ··· 22 20 struct inode *inode = file ? file_inode(file) : mapping->host; 23 21 struct netfs_inode *ctx = netfs_inode(inode); 24 22 struct netfs_io_request *rreq; 23 + mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; 24 + struct kmem_cache *cache = mempool->pool_data; 25 25 bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || 26 26 origin == NETFS_DIO_READ || 27 27 origin == NETFS_DIO_WRITE); 28 28 bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); 29 29 int ret; 30 30 31 - rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request), 32 - GFP_KERNEL); 33 - if (!rreq) 34 - return ERR_PTR(-ENOMEM); 31 + for (;;) { 32 + rreq = mempool_alloc(mempool, GFP_KERNEL); 33 + if (rreq) 34 + break; 35 + msleep(10); 36 + } 35 37 38 + memset(rreq, 0, kmem_cache_size(cache)); 36 39 rreq->start = start; 37 40 rreq->len = len; 38 41 rreq->upper_len = len; ··· 47 40 rreq->inode = inode; 48 41 rreq->i_size = i_size_read(inode); 49 42 rreq->debug_id = atomic_inc_return(&debug_ids); 43 + rreq->wsize = INT_MAX; 44 + spin_lock_init(&rreq->lock); 45 + INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); 46 + INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); 50 47 INIT_LIST_HEAD(&rreq->subrequests); 51 48 INIT_WORK(&rreq->work, NULL); 52 49 refcount_set(&rreq->ref, 1); 53 50 54 51 __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 55 - if (cached) 52 + if (cached) { 56 53 __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); 54 + if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) 55 + /* Filesystem uses deprecated PG_private_2 marking. */ 56 + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); 57 + } 57 58 if (file && file->f_flags & O_NONBLOCK) 58 59 __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); 59 60 if (rreq->netfs_ops->init_request) { 60 61 ret = rreq->netfs_ops->init_request(rreq, file); 61 62 if (ret < 0) { 62 - kfree(rreq); 63 + mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); 63 64 return ERR_PTR(ret); 64 65 } 65 66 } ··· 89 74 void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) 90 75 { 91 76 struct netfs_io_subrequest *subreq; 77 + struct netfs_io_stream *stream; 78 + int s; 92 79 93 80 while (!list_empty(&rreq->subrequests)) { 94 81 subreq = list_first_entry(&rreq->subrequests, ··· 99 82 netfs_put_subrequest(subreq, was_async, 100 83 netfs_sreq_trace_put_clear); 101 84 } 85 + 86 + for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) { 87 + stream = &rreq->io_streams[s]; 88 + while (!list_empty(&stream->subrequests)) { 89 + subreq = list_first_entry(&stream->subrequests, 90 + struct netfs_io_subrequest, rreq_link); 91 + list_del(&subreq->rreq_link); 92 + netfs_put_subrequest(subreq, was_async, 93 + netfs_sreq_trace_put_clear); 94 + } 95 + } 96 + } 97 + 98 + static void netfs_free_request_rcu(struct rcu_head *rcu) 99 + { 100 + struct netfs_io_request *rreq = container_of(rcu, struct netfs_io_request, rcu); 101 + 102 + mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); 103 + netfs_stat_d(&netfs_n_rh_rreq); 102 104 } 103 105 104 106 static void netfs_free_request(struct work_struct *work) ··· 142 106 } 143 107 kvfree(rreq->direct_bv); 144 108 } 145 - kfree_rcu(rreq, rcu); 146 - netfs_stat_d(&netfs_n_rh_rreq); 109 + call_rcu(&rreq->rcu, netfs_free_request_rcu); 147 110 } 148 111 149 112 void netfs_put_request(struct netfs_io_request *rreq, bool was_async, ··· 174 139 struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq) 175 140 { 176 141 struct netfs_io_subrequest *subreq; 142 + mempool_t *mempool = rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool; 143 + struct kmem_cache *cache = mempool->pool_data; 177 144 178 - subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?: 179 - sizeof(struct netfs_io_subrequest), 180 - GFP_KERNEL); 181 - if (subreq) { 182 - INIT_WORK(&subreq->work, NULL); 183 - INIT_LIST_HEAD(&subreq->rreq_link); 184 - refcount_set(&subreq->ref, 2); 185 - subreq->rreq = rreq; 186 - netfs_get_request(rreq, netfs_rreq_trace_get_subreq); 187 - netfs_stat(&netfs_n_rh_sreq); 145 + for (;;) { 146 + subreq = mempool_alloc(rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool, 147 + GFP_KERNEL); 148 + if (subreq) 149 + break; 150 + msleep(10); 188 151 } 189 152 153 + memset(subreq, 0, kmem_cache_size(cache)); 154 + INIT_WORK(&subreq->work, NULL); 155 + INIT_LIST_HEAD(&subreq->rreq_link); 156 + refcount_set(&subreq->ref, 2); 157 + subreq->rreq = rreq; 158 + subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); 159 + netfs_get_request(rreq, netfs_rreq_trace_get_subreq); 160 + netfs_stat(&netfs_n_rh_sreq); 190 161 return subreq; 191 162 } 192 163 ··· 214 173 trace_netfs_sreq(subreq, netfs_sreq_trace_free); 215 174 if (rreq->netfs_ops->free_subrequest) 216 175 rreq->netfs_ops->free_subrequest(subreq); 217 - kfree(subreq); 176 + mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); 218 177 netfs_stat_d(&netfs_n_rh_sreq); 219 178 netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); 220 179 }
-478
fs/netfs/output.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Network filesystem high-level write support. 3 - * 4 - * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 - * Written by David Howells (dhowells@redhat.com) 6 - */ 7 - 8 - #include <linux/fs.h> 9 - #include <linux/mm.h> 10 - #include <linux/pagemap.h> 11 - #include <linux/slab.h> 12 - #include <linux/writeback.h> 13 - #include <linux/pagevec.h> 14 - #include "internal.h" 15 - 16 - /** 17 - * netfs_create_write_request - Create a write operation. 18 - * @wreq: The write request this is storing from. 19 - * @dest: The destination type 20 - * @start: Start of the region this write will modify 21 - * @len: Length of the modification 22 - * @worker: The worker function to handle the write(s) 23 - * 24 - * Allocate a write operation, set it up and add it to the list on a write 25 - * request. 26 - */ 27 - struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, 28 - enum netfs_io_source dest, 29 - loff_t start, size_t len, 30 - work_func_t worker) 31 - { 32 - struct netfs_io_subrequest *subreq; 33 - 34 - subreq = netfs_alloc_subrequest(wreq); 35 - if (subreq) { 36 - INIT_WORK(&subreq->work, worker); 37 - subreq->source = dest; 38 - subreq->start = start; 39 - subreq->len = len; 40 - subreq->debug_index = wreq->subreq_counter++; 41 - 42 - switch (subreq->source) { 43 - case NETFS_UPLOAD_TO_SERVER: 44 - netfs_stat(&netfs_n_wh_upload); 45 - break; 46 - case NETFS_WRITE_TO_CACHE: 47 - netfs_stat(&netfs_n_wh_write); 48 - break; 49 - default: 50 - BUG(); 51 - } 52 - 53 - subreq->io_iter = wreq->io_iter; 54 - iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); 55 - iov_iter_truncate(&subreq->io_iter, subreq->len); 56 - 57 - trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 58 - refcount_read(&subreq->ref), 59 - netfs_sreq_trace_new); 60 - atomic_inc(&wreq->nr_outstanding); 61 - list_add_tail(&subreq->rreq_link, &wreq->subrequests); 62 - trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 63 - } 64 - 65 - return subreq; 66 - } 67 - EXPORT_SYMBOL(netfs_create_write_request); 68 - 69 - /* 70 - * Process a completed write request once all the component operations have 71 - * been completed. 72 - */ 73 - static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) 74 - { 75 - struct netfs_io_subrequest *subreq; 76 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 77 - size_t transferred = 0; 78 - 79 - _enter("R=%x[]", wreq->debug_id); 80 - 81 - trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 82 - 83 - list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 84 - if (subreq->error || subreq->transferred == 0) 85 - break; 86 - transferred += subreq->transferred; 87 - if (subreq->transferred < subreq->len) 88 - break; 89 - } 90 - wreq->transferred = transferred; 91 - 92 - list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 93 - if (!subreq->error) 94 - continue; 95 - switch (subreq->source) { 96 - case NETFS_UPLOAD_TO_SERVER: 97 - /* Depending on the type of failure, this may prevent 98 - * writeback completion unless we're in disconnected 99 - * mode. 100 - */ 101 - if (!wreq->error) 102 - wreq->error = subreq->error; 103 - break; 104 - 105 - case NETFS_WRITE_TO_CACHE: 106 - /* Failure doesn't prevent writeback completion unless 107 - * we're in disconnected mode. 108 - */ 109 - if (subreq->error != -ENOBUFS) 110 - ctx->ops->invalidate_cache(wreq); 111 - break; 112 - 113 - default: 114 - WARN_ON_ONCE(1); 115 - if (!wreq->error) 116 - wreq->error = -EIO; 117 - return; 118 - } 119 - } 120 - 121 - wreq->cleanup(wreq); 122 - 123 - if (wreq->origin == NETFS_DIO_WRITE && 124 - wreq->mapping->nrpages) { 125 - pgoff_t first = wreq->start >> PAGE_SHIFT; 126 - pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 127 - invalidate_inode_pages2_range(wreq->mapping, first, last); 128 - } 129 - 130 - if (wreq->origin == NETFS_DIO_WRITE) 131 - inode_dio_end(wreq->inode); 132 - 133 - _debug("finished"); 134 - trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 135 - clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 136 - wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 137 - 138 - if (wreq->iocb) { 139 - wreq->iocb->ki_pos += transferred; 140 - if (wreq->iocb->ki_complete) 141 - wreq->iocb->ki_complete( 142 - wreq->iocb, wreq->error ? wreq->error : transferred); 143 - } 144 - 145 - netfs_clear_subrequests(wreq, was_async); 146 - netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); 147 - } 148 - 149 - /* 150 - * Deal with the completion of writing the data to the cache. 151 - */ 152 - void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 153 - bool was_async) 154 - { 155 - struct netfs_io_subrequest *subreq = _op; 156 - struct netfs_io_request *wreq = subreq->rreq; 157 - unsigned int u; 158 - 159 - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 160 - 161 - switch (subreq->source) { 162 - case NETFS_UPLOAD_TO_SERVER: 163 - netfs_stat(&netfs_n_wh_upload_done); 164 - break; 165 - case NETFS_WRITE_TO_CACHE: 166 - netfs_stat(&netfs_n_wh_write_done); 167 - break; 168 - case NETFS_INVALID_WRITE: 169 - break; 170 - default: 171 - BUG(); 172 - } 173 - 174 - if (IS_ERR_VALUE(transferred_or_error)) { 175 - subreq->error = transferred_or_error; 176 - trace_netfs_failure(wreq, subreq, transferred_or_error, 177 - netfs_fail_write); 178 - goto failed; 179 - } 180 - 181 - if (WARN(transferred_or_error > subreq->len - subreq->transferred, 182 - "Subreq excess write: R%x[%x] %zd > %zu - %zu", 183 - wreq->debug_id, subreq->debug_index, 184 - transferred_or_error, subreq->len, subreq->transferred)) 185 - transferred_or_error = subreq->len - subreq->transferred; 186 - 187 - subreq->error = 0; 188 - subreq->transferred += transferred_or_error; 189 - 190 - if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) 191 - pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", 192 - wreq->debug_id, subreq->debug_index, 193 - iov_iter_count(&subreq->io_iter), subreq->len, 194 - subreq->transferred, subreq->io_iter.iter_type); 195 - 196 - if (subreq->transferred < subreq->len) 197 - goto incomplete; 198 - 199 - __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 200 - out: 201 - trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 202 - 203 - /* If we decrement nr_outstanding to 0, the ref belongs to us. */ 204 - u = atomic_dec_return(&wreq->nr_outstanding); 205 - if (u == 0) 206 - netfs_write_terminated(wreq, was_async); 207 - else if (u == 1) 208 - wake_up_var(&wreq->nr_outstanding); 209 - 210 - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 211 - return; 212 - 213 - incomplete: 214 - if (transferred_or_error == 0) { 215 - if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 216 - subreq->error = -ENODATA; 217 - goto failed; 218 - } 219 - } else { 220 - __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 221 - } 222 - 223 - __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); 224 - set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 225 - goto out; 226 - 227 - failed: 228 - switch (subreq->source) { 229 - case NETFS_WRITE_TO_CACHE: 230 - netfs_stat(&netfs_n_wh_write_failed); 231 - set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 232 - break; 233 - case NETFS_UPLOAD_TO_SERVER: 234 - netfs_stat(&netfs_n_wh_upload_failed); 235 - set_bit(NETFS_RREQ_FAILED, &wreq->flags); 236 - wreq->error = subreq->error; 237 - break; 238 - default: 239 - break; 240 - } 241 - goto out; 242 - } 243 - EXPORT_SYMBOL(netfs_write_subrequest_terminated); 244 - 245 - static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) 246 - { 247 - struct netfs_io_request *wreq = subreq->rreq; 248 - struct netfs_cache_resources *cres = &wreq->cache_resources; 249 - 250 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 251 - 252 - cres->ops->write(cres, subreq->start, &subreq->io_iter, 253 - netfs_write_subrequest_terminated, subreq); 254 - } 255 - 256 - static void netfs_write_to_cache_op_worker(struct work_struct *work) 257 - { 258 - struct netfs_io_subrequest *subreq = 259 - container_of(work, struct netfs_io_subrequest, work); 260 - 261 - netfs_write_to_cache_op(subreq); 262 - } 263 - 264 - /** 265 - * netfs_queue_write_request - Queue a write request for attention 266 - * @subreq: The write request to be queued 267 - * 268 - * Queue the specified write request for processing by a worker thread. We 269 - * pass the caller's ref on the request to the worker thread. 270 - */ 271 - void netfs_queue_write_request(struct netfs_io_subrequest *subreq) 272 - { 273 - if (!queue_work(system_unbound_wq, &subreq->work)) 274 - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); 275 - } 276 - EXPORT_SYMBOL(netfs_queue_write_request); 277 - 278 - /* 279 - * Set up a op for writing to the cache. 280 - */ 281 - static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) 282 - { 283 - struct netfs_cache_resources *cres = &wreq->cache_resources; 284 - struct netfs_io_subrequest *subreq; 285 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 286 - struct fscache_cookie *cookie = netfs_i_cookie(ctx); 287 - loff_t start = wreq->start; 288 - size_t len = wreq->len; 289 - int ret; 290 - 291 - if (!fscache_cookie_enabled(cookie)) { 292 - clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); 293 - return; 294 - } 295 - 296 - _debug("write to cache"); 297 - ret = fscache_begin_write_operation(cres, cookie); 298 - if (ret < 0) 299 - return; 300 - 301 - ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, 302 - i_size_read(wreq->inode), true); 303 - if (ret < 0) 304 - return; 305 - 306 - subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, 307 - netfs_write_to_cache_op_worker); 308 - if (!subreq) 309 - return; 310 - 311 - netfs_write_to_cache_op(subreq); 312 - } 313 - 314 - /* 315 - * Begin the process of writing out a chunk of data. 316 - * 317 - * We are given a write request that holds a series of dirty regions and 318 - * (partially) covers a sequence of folios, all of which are present. The 319 - * pages must have been marked as writeback as appropriate. 320 - * 321 - * We need to perform the following steps: 322 - * 323 - * (1) If encrypting, create an output buffer and encrypt each block of the 324 - * data into it, otherwise the output buffer will point to the original 325 - * folios. 326 - * 327 - * (2) If the data is to be cached, set up a write op for the entire output 328 - * buffer to the cache, if the cache wants to accept it. 329 - * 330 - * (3) If the data is to be uploaded (ie. not merely cached): 331 - * 332 - * (a) If the data is to be compressed, create a compression buffer and 333 - * compress the data into it. 334 - * 335 - * (b) For each destination we want to upload to, set up write ops to write 336 - * to that destination. We may need multiple writes if the data is not 337 - * contiguous or the span exceeds wsize for a server. 338 - */ 339 - int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 340 - enum netfs_write_trace what) 341 - { 342 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 343 - 344 - _enter("R=%x %llx-%llx f=%lx", 345 - wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, 346 - wreq->flags); 347 - 348 - trace_netfs_write(wreq, what); 349 - if (wreq->len == 0 || wreq->iter.count == 0) { 350 - pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); 351 - return -EIO; 352 - } 353 - 354 - if (wreq->origin == NETFS_DIO_WRITE) 355 - inode_dio_begin(wreq->inode); 356 - 357 - wreq->io_iter = wreq->iter; 358 - 359 - /* ->outstanding > 0 carries a ref */ 360 - netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 361 - atomic_set(&wreq->nr_outstanding, 1); 362 - 363 - /* Start the encryption/compression going. We can do that in the 364 - * background whilst we generate a list of write ops that we want to 365 - * perform. 366 - */ 367 - // TODO: Encrypt or compress the region as appropriate 368 - 369 - /* We need to write all of the region to the cache */ 370 - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 371 - netfs_set_up_write_to_cache(wreq); 372 - 373 - /* However, we don't necessarily write all of the region to the server. 374 - * Caching of reads is being managed this way also. 375 - */ 376 - if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 377 - ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); 378 - 379 - if (atomic_dec_and_test(&wreq->nr_outstanding)) 380 - netfs_write_terminated(wreq, false); 381 - 382 - if (!may_wait) 383 - return -EIOCBQUEUED; 384 - 385 - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 386 - TASK_UNINTERRUPTIBLE); 387 - return wreq->error; 388 - } 389 - 390 - /* 391 - * Begin a write operation for writing through the pagecache. 392 - */ 393 - struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 394 - { 395 - struct netfs_io_request *wreq; 396 - struct file *file = iocb->ki_filp; 397 - 398 - wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, 399 - NETFS_WRITETHROUGH); 400 - if (IS_ERR(wreq)) 401 - return wreq; 402 - 403 - trace_netfs_write(wreq, netfs_write_trace_writethrough); 404 - 405 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 406 - iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); 407 - wreq->io_iter = wreq->iter; 408 - 409 - /* ->outstanding > 0 carries a ref */ 410 - netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 411 - atomic_set(&wreq->nr_outstanding, 1); 412 - return wreq; 413 - } 414 - 415 - static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) 416 - { 417 - struct netfs_inode *ictx = netfs_inode(wreq->inode); 418 - unsigned long long start; 419 - size_t len; 420 - 421 - if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 422 - return; 423 - 424 - start = wreq->start + wreq->submitted; 425 - len = wreq->iter.count - wreq->submitted; 426 - if (!final) { 427 - len /= wreq->wsize; /* Round to number of maximum packets */ 428 - len *= wreq->wsize; 429 - } 430 - 431 - ictx->ops->create_write_requests(wreq, start, len); 432 - wreq->submitted += len; 433 - } 434 - 435 - /* 436 - * Advance the state of the write operation used when writing through the 437 - * pagecache. Data has been copied into the pagecache that we need to append 438 - * to the request. If we've added more than wsize then we need to create a new 439 - * subrequest. 440 - */ 441 - int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) 442 - { 443 - _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", 444 - wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); 445 - 446 - wreq->iter.count += copied; 447 - wreq->io_iter.count += copied; 448 - if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) 449 - netfs_submit_writethrough(wreq, false); 450 - 451 - return wreq->error; 452 - } 453 - 454 - /* 455 - * End a write operation used when writing through the pagecache. 456 - */ 457 - int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) 458 - { 459 - int ret = -EIOCBQUEUED; 460 - 461 - _enter("ic=%zu sb=%zu ws=%u", 462 - wreq->iter.count, wreq->submitted, wreq->wsize); 463 - 464 - if (wreq->submitted < wreq->io_iter.count) 465 - netfs_submit_writethrough(wreq, true); 466 - 467 - if (atomic_dec_and_test(&wreq->nr_outstanding)) 468 - netfs_write_terminated(wreq, false); 469 - 470 - if (is_sync_kiocb(iocb)) { 471 - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 472 - TASK_UNINTERRUPTIBLE); 473 - ret = wreq->error; 474 - } 475 - 476 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 477 - return ret; 478 - }
+12 -5
fs/netfs/stats.c
··· 10 10 #include "internal.h" 11 11 12 12 atomic_t netfs_n_rh_dio_read; 13 - atomic_t netfs_n_rh_dio_write; 14 13 atomic_t netfs_n_rh_readahead; 15 - atomic_t netfs_n_rh_readpage; 14 + atomic_t netfs_n_rh_read_folio; 16 15 atomic_t netfs_n_rh_rreq; 17 16 atomic_t netfs_n_rh_sreq; 18 17 atomic_t netfs_n_rh_download; ··· 28 29 atomic_t netfs_n_rh_write_done; 29 30 atomic_t netfs_n_rh_write_failed; 30 31 atomic_t netfs_n_rh_write_zskip; 32 + atomic_t netfs_n_wh_buffered_write; 33 + atomic_t netfs_n_wh_writethrough; 34 + atomic_t netfs_n_wh_dio_write; 35 + atomic_t netfs_n_wh_writepages; 31 36 atomic_t netfs_n_wh_wstream_conflict; 32 37 atomic_t netfs_n_wh_upload; 33 38 atomic_t netfs_n_wh_upload_done; ··· 42 39 43 40 int netfs_stats_show(struct seq_file *m, void *v) 44 41 { 45 - seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n", 42 + seq_printf(m, "Netfs : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n", 46 43 atomic_read(&netfs_n_rh_dio_read), 47 - atomic_read(&netfs_n_rh_dio_write), 48 44 atomic_read(&netfs_n_rh_readahead), 49 - atomic_read(&netfs_n_rh_readpage), 45 + atomic_read(&netfs_n_rh_read_folio), 50 46 atomic_read(&netfs_n_rh_write_begin), 51 47 atomic_read(&netfs_n_rh_write_zskip)); 48 + seq_printf(m, "Netfs : BW=%u WT=%u DW=%u WP=%u\n", 49 + atomic_read(&netfs_n_wh_buffered_write), 50 + atomic_read(&netfs_n_wh_writethrough), 51 + atomic_read(&netfs_n_wh_dio_write), 52 + atomic_read(&netfs_n_wh_writepages)); 52 53 seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n", 53 54 atomic_read(&netfs_n_rh_zero), 54 55 atomic_read(&netfs_n_rh_short_read),
+808
fs/netfs/write_collect.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem write subrequest result collection, assessment 3 + * and retrying. 4 + * 5 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 + * Written by David Howells (dhowells@redhat.com) 7 + */ 8 + 9 + #include <linux/export.h> 10 + #include <linux/fs.h> 11 + #include <linux/mm.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/slab.h> 14 + #include "internal.h" 15 + 16 + /* Notes made in the collector */ 17 + #define HIT_PENDING 0x01 /* A front op was still pending */ 18 + #define SOME_EMPTY 0x02 /* One of more streams are empty */ 19 + #define ALL_EMPTY 0x04 /* All streams are empty */ 20 + #define MAYBE_DISCONTIG 0x08 /* A front op may be discontiguous (rounded to PAGE_SIZE) */ 21 + #define NEED_REASSESS 0x10 /* Need to loop round and reassess */ 22 + #define REASSESS_DISCONTIG 0x20 /* Reassess discontiguity if contiguity advances */ 23 + #define MADE_PROGRESS 0x40 /* Made progress cleaning up a stream or the folio set */ 24 + #define BUFFERED 0x80 /* The pagecache needs cleaning up */ 25 + #define NEED_RETRY 0x100 /* A front op requests retrying */ 26 + #define SAW_FAILURE 0x200 /* One stream or hit a permanent failure */ 27 + 28 + /* 29 + * Successful completion of write of a folio to the server and/or cache. Note 30 + * that we are not allowed to lock the folio here on pain of deadlocking with 31 + * truncate. 32 + */ 33 + int netfs_folio_written_back(struct folio *folio) 34 + { 35 + enum netfs_folio_trace why = netfs_folio_trace_clear; 36 + struct netfs_folio *finfo; 37 + struct netfs_group *group = NULL; 38 + int gcount = 0; 39 + 40 + if ((finfo = netfs_folio_info(folio))) { 41 + /* Streaming writes cannot be redirtied whilst under writeback, 42 + * so discard the streaming record. 43 + */ 44 + folio_detach_private(folio); 45 + group = finfo->netfs_group; 46 + gcount++; 47 + kfree(finfo); 48 + why = netfs_folio_trace_clear_s; 49 + goto end_wb; 50 + } 51 + 52 + if ((group = netfs_folio_group(folio))) { 53 + if (group == NETFS_FOLIO_COPY_TO_CACHE) { 54 + why = netfs_folio_trace_clear_cc; 55 + folio_detach_private(folio); 56 + goto end_wb; 57 + } 58 + 59 + /* Need to detach the group pointer if the page didn't get 60 + * redirtied. If it has been redirtied, then it must be within 61 + * the same group. 62 + */ 63 + why = netfs_folio_trace_redirtied; 64 + if (!folio_test_dirty(folio)) { 65 + folio_detach_private(folio); 66 + gcount++; 67 + why = netfs_folio_trace_clear_g; 68 + } 69 + } 70 + 71 + end_wb: 72 + trace_netfs_folio(folio, why); 73 + folio_end_writeback(folio); 74 + return gcount; 75 + } 76 + 77 + /* 78 + * Get hold of a folio we have under writeback. We don't want to get the 79 + * refcount on it. 80 + */ 81 + static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos) 82 + { 83 + XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE); 84 + struct folio *folio; 85 + 86 + rcu_read_lock(); 87 + 88 + for (;;) { 89 + xas_reset(&xas); 90 + folio = xas_load(&xas); 91 + if (xas_retry(&xas, folio)) 92 + continue; 93 + 94 + if (!folio || xa_is_value(folio)) 95 + kdebug("R=%08x: folio %lx (%llx) not present", 96 + wreq->debug_id, xas.xa_index, pos / PAGE_SIZE); 97 + BUG_ON(!folio || xa_is_value(folio)); 98 + 99 + if (folio == xas_reload(&xas)) 100 + break; 101 + } 102 + 103 + rcu_read_unlock(); 104 + 105 + if (WARN_ONCE(!folio_test_writeback(folio), 106 + "R=%08x: folio %lx is not under writeback\n", 107 + wreq->debug_id, folio->index)) { 108 + trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); 109 + } 110 + return folio; 111 + } 112 + 113 + /* 114 + * Unlock any folios we've finished with. 115 + */ 116 + static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, 117 + unsigned long long collected_to, 118 + unsigned int *notes) 119 + { 120 + for (;;) { 121 + struct folio *folio; 122 + struct netfs_folio *finfo; 123 + unsigned long long fpos, fend; 124 + size_t fsize, flen; 125 + 126 + folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to); 127 + 128 + fpos = folio_pos(folio); 129 + fsize = folio_size(folio); 130 + finfo = netfs_folio_info(folio); 131 + flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize; 132 + 133 + fend = min_t(unsigned long long, fpos + flen, wreq->i_size); 134 + 135 + trace_netfs_collect_folio(wreq, folio, fend, collected_to); 136 + 137 + if (fpos + fsize > wreq->contiguity) { 138 + trace_netfs_collect_contig(wreq, fpos + fsize, 139 + netfs_contig_trace_unlock); 140 + wreq->contiguity = fpos + fsize; 141 + } 142 + 143 + /* Unlock any folio we've transferred all of. */ 144 + if (collected_to < fend) 145 + break; 146 + 147 + wreq->nr_group_rel += netfs_folio_written_back(folio); 148 + wreq->cleaned_to = fpos + fsize; 149 + *notes |= MADE_PROGRESS; 150 + 151 + if (fpos + fsize >= collected_to) 152 + break; 153 + } 154 + } 155 + 156 + /* 157 + * Perform retries on the streams that need it. 158 + */ 159 + static void netfs_retry_write_stream(struct netfs_io_request *wreq, 160 + struct netfs_io_stream *stream) 161 + { 162 + struct list_head *next; 163 + 164 + _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); 165 + 166 + if (list_empty(&stream->subrequests)) 167 + return; 168 + 169 + if (stream->source == NETFS_UPLOAD_TO_SERVER && 170 + wreq->netfs_ops->retry_request) 171 + wreq->netfs_ops->retry_request(wreq, stream); 172 + 173 + if (unlikely(stream->failed)) 174 + return; 175 + 176 + /* If there's no renegotiation to do, just resend each failed subreq. */ 177 + if (!stream->prepare_write) { 178 + struct netfs_io_subrequest *subreq; 179 + 180 + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 181 + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 182 + break; 183 + if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 184 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 185 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 186 + netfs_reissue_write(stream, subreq); 187 + } 188 + } 189 + return; 190 + } 191 + 192 + next = stream->subrequests.next; 193 + 194 + do { 195 + struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp; 196 + unsigned long long start, len; 197 + size_t part; 198 + bool boundary = false; 199 + 200 + /* Go through the stream and find the next span of contiguous 201 + * data that we then rejig (cifs, for example, needs the wsize 202 + * renegotiating) and reissue. 203 + */ 204 + from = list_entry(next, struct netfs_io_subrequest, rreq_link); 205 + to = from; 206 + start = from->start + from->transferred; 207 + len = from->len - from->transferred; 208 + 209 + if (test_bit(NETFS_SREQ_FAILED, &from->flags) || 210 + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) 211 + return; 212 + 213 + list_for_each_continue(next, &stream->subrequests) { 214 + subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); 215 + if (subreq->start + subreq->transferred != start + len || 216 + test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || 217 + !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) 218 + break; 219 + to = subreq; 220 + len += to->len; 221 + } 222 + 223 + /* Work through the sublist. */ 224 + subreq = from; 225 + list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { 226 + if (!len) 227 + break; 228 + /* Renegotiate max_len (wsize) */ 229 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 230 + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 231 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 232 + stream->prepare_write(subreq); 233 + 234 + part = min(len, subreq->max_len); 235 + subreq->len = part; 236 + subreq->start = start; 237 + subreq->transferred = 0; 238 + len -= part; 239 + start += part; 240 + if (len && subreq == to && 241 + __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags)) 242 + boundary = true; 243 + 244 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 245 + netfs_reissue_write(stream, subreq); 246 + if (subreq == to) 247 + break; 248 + } 249 + 250 + /* If we managed to use fewer subreqs, we can discard the 251 + * excess; if we used the same number, then we're done. 252 + */ 253 + if (!len) { 254 + if (subreq == to) 255 + continue; 256 + list_for_each_entry_safe_from(subreq, tmp, 257 + &stream->subrequests, rreq_link) { 258 + trace_netfs_sreq(subreq, netfs_sreq_trace_discard); 259 + list_del(&subreq->rreq_link); 260 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); 261 + if (subreq == to) 262 + break; 263 + } 264 + continue; 265 + } 266 + 267 + /* We ran out of subrequests, so we need to allocate some more 268 + * and insert them after. 269 + */ 270 + do { 271 + subreq = netfs_alloc_subrequest(wreq); 272 + subreq->source = to->source; 273 + subreq->start = start; 274 + subreq->max_len = len; 275 + subreq->max_nr_segs = INT_MAX; 276 + subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); 277 + subreq->stream_nr = to->stream_nr; 278 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 279 + 280 + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 281 + refcount_read(&subreq->ref), 282 + netfs_sreq_trace_new); 283 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 284 + 285 + list_add(&subreq->rreq_link, &to->rreq_link); 286 + to = list_next_entry(to, rreq_link); 287 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 288 + 289 + switch (stream->source) { 290 + case NETFS_UPLOAD_TO_SERVER: 291 + netfs_stat(&netfs_n_wh_upload); 292 + subreq->max_len = min(len, wreq->wsize); 293 + break; 294 + case NETFS_WRITE_TO_CACHE: 295 + netfs_stat(&netfs_n_wh_write); 296 + break; 297 + default: 298 + WARN_ON_ONCE(1); 299 + } 300 + 301 + stream->prepare_write(subreq); 302 + 303 + part = min(len, subreq->max_len); 304 + subreq->len = subreq->transferred + part; 305 + len -= part; 306 + start += part; 307 + if (!len && boundary) { 308 + __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); 309 + boundary = false; 310 + } 311 + 312 + netfs_reissue_write(stream, subreq); 313 + if (!len) 314 + break; 315 + 316 + } while (len); 317 + 318 + } while (!list_is_head(next, &stream->subrequests)); 319 + } 320 + 321 + /* 322 + * Perform retries on the streams that need it. If we're doing content 323 + * encryption and the server copy changed due to a third-party write, we may 324 + * need to do an RMW cycle and also rewrite the data to the cache. 325 + */ 326 + static void netfs_retry_writes(struct netfs_io_request *wreq) 327 + { 328 + struct netfs_io_subrequest *subreq; 329 + struct netfs_io_stream *stream; 330 + int s; 331 + 332 + /* Wait for all outstanding I/O to quiesce before performing retries as 333 + * we may need to renegotiate the I/O sizes. 334 + */ 335 + for (s = 0; s < NR_IO_STREAMS; s++) { 336 + stream = &wreq->io_streams[s]; 337 + if (!stream->active) 338 + continue; 339 + 340 + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 341 + wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, 342 + TASK_UNINTERRUPTIBLE); 343 + } 344 + } 345 + 346 + // TODO: Enc: Fetch changed partial pages 347 + // TODO: Enc: Reencrypt content if needed. 348 + // TODO: Enc: Wind back transferred point. 349 + // TODO: Enc: Mark cache pages for retry. 350 + 351 + for (s = 0; s < NR_IO_STREAMS; s++) { 352 + stream = &wreq->io_streams[s]; 353 + if (stream->need_retry) { 354 + stream->need_retry = false; 355 + netfs_retry_write_stream(wreq, stream); 356 + } 357 + } 358 + } 359 + 360 + /* 361 + * Collect and assess the results of various write subrequests. We may need to 362 + * retry some of the results - or even do an RMW cycle for content crypto. 363 + * 364 + * Note that we have a number of parallel, overlapping lists of subrequests, 365 + * one to the server and one to the local cache for example, which may not be 366 + * the same size or starting position and may not even correspond in boundary 367 + * alignment. 368 + */ 369 + static void netfs_collect_write_results(struct netfs_io_request *wreq) 370 + { 371 + struct netfs_io_subrequest *front, *remove; 372 + struct netfs_io_stream *stream; 373 + unsigned long long collected_to; 374 + unsigned int notes; 375 + int s; 376 + 377 + _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 378 + trace_netfs_collect(wreq); 379 + trace_netfs_rreq(wreq, netfs_rreq_trace_collect); 380 + 381 + reassess_streams: 382 + smp_rmb(); 383 + collected_to = ULLONG_MAX; 384 + if (wreq->origin == NETFS_WRITEBACK) 385 + notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG; 386 + else if (wreq->origin == NETFS_WRITETHROUGH) 387 + notes = ALL_EMPTY | BUFFERED; 388 + else 389 + notes = ALL_EMPTY; 390 + 391 + /* Remove completed subrequests from the front of the streams and 392 + * advance the completion point on each stream. We stop when we hit 393 + * something that's in progress. The issuer thread may be adding stuff 394 + * to the tail whilst we're doing this. 395 + * 396 + * We must not, however, merge in discontiguities that span whole 397 + * folios that aren't under writeback. This is made more complicated 398 + * by the folios in the gap being of unpredictable sizes - if they even 399 + * exist - but we don't want to look them up. 400 + */ 401 + for (s = 0; s < NR_IO_STREAMS; s++) { 402 + loff_t rstart, rend; 403 + 404 + stream = &wreq->io_streams[s]; 405 + /* Read active flag before list pointers */ 406 + if (!smp_load_acquire(&stream->active)) 407 + continue; 408 + 409 + front = stream->front; 410 + while (front) { 411 + trace_netfs_collect_sreq(wreq, front); 412 + //_debug("sreq [%x] %llx %zx/%zx", 413 + // front->debug_index, front->start, front->transferred, front->len); 414 + 415 + /* Stall if there may be a discontinuity. */ 416 + rstart = round_down(front->start, PAGE_SIZE); 417 + if (rstart > wreq->contiguity) { 418 + if (wreq->contiguity > stream->collected_to) { 419 + trace_netfs_collect_gap(wreq, stream, 420 + wreq->contiguity, 'D'); 421 + stream->collected_to = wreq->contiguity; 422 + } 423 + notes |= REASSESS_DISCONTIG; 424 + break; 425 + } 426 + rend = round_up(front->start + front->len, PAGE_SIZE); 427 + if (rend > wreq->contiguity) { 428 + trace_netfs_collect_contig(wreq, rend, 429 + netfs_contig_trace_collect); 430 + wreq->contiguity = rend; 431 + if (notes & REASSESS_DISCONTIG) 432 + notes |= NEED_REASSESS; 433 + } 434 + notes &= ~MAYBE_DISCONTIG; 435 + 436 + /* Stall if the front is still undergoing I/O. */ 437 + if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { 438 + notes |= HIT_PENDING; 439 + break; 440 + } 441 + smp_rmb(); /* Read counters after I-P flag. */ 442 + 443 + if (stream->failed) { 444 + stream->collected_to = front->start + front->len; 445 + notes |= MADE_PROGRESS | SAW_FAILURE; 446 + goto cancel; 447 + } 448 + if (front->start + front->transferred > stream->collected_to) { 449 + stream->collected_to = front->start + front->transferred; 450 + stream->transferred = stream->collected_to - wreq->start; 451 + notes |= MADE_PROGRESS; 452 + } 453 + if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 454 + stream->failed = true; 455 + stream->error = front->error; 456 + if (stream->source == NETFS_UPLOAD_TO_SERVER) 457 + mapping_set_error(wreq->mapping, front->error); 458 + notes |= NEED_REASSESS | SAW_FAILURE; 459 + break; 460 + } 461 + if (front->transferred < front->len) { 462 + stream->need_retry = true; 463 + notes |= NEED_RETRY | MADE_PROGRESS; 464 + break; 465 + } 466 + 467 + cancel: 468 + /* Remove if completely consumed. */ 469 + spin_lock(&wreq->lock); 470 + 471 + remove = front; 472 + list_del_init(&front->rreq_link); 473 + front = list_first_entry_or_null(&stream->subrequests, 474 + struct netfs_io_subrequest, rreq_link); 475 + stream->front = front; 476 + if (!front) { 477 + unsigned long long jump_to = atomic64_read(&wreq->issued_to); 478 + 479 + if (stream->collected_to < jump_to) { 480 + trace_netfs_collect_gap(wreq, stream, jump_to, 'A'); 481 + stream->collected_to = jump_to; 482 + } 483 + } 484 + 485 + spin_unlock(&wreq->lock); 486 + netfs_put_subrequest(remove, false, 487 + notes & SAW_FAILURE ? 488 + netfs_sreq_trace_put_cancel : 489 + netfs_sreq_trace_put_done); 490 + } 491 + 492 + if (front) 493 + notes &= ~ALL_EMPTY; 494 + else 495 + notes |= SOME_EMPTY; 496 + 497 + if (stream->collected_to < collected_to) 498 + collected_to = stream->collected_to; 499 + } 500 + 501 + if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to) 502 + wreq->collected_to = collected_to; 503 + 504 + /* If we have an empty stream, we need to jump it forward over any gap 505 + * otherwise the collection point will never advance. 506 + * 507 + * Note that the issuer always adds to the stream with the lowest 508 + * so-far submitted start, so if we see two consecutive subreqs in one 509 + * stream with nothing between then in another stream, then the second 510 + * stream has a gap that can be jumped. 511 + */ 512 + if (notes & SOME_EMPTY) { 513 + unsigned long long jump_to = wreq->start + wreq->len; 514 + 515 + for (s = 0; s < NR_IO_STREAMS; s++) { 516 + stream = &wreq->io_streams[s]; 517 + if (stream->active && 518 + stream->front && 519 + stream->front->start < jump_to) 520 + jump_to = stream->front->start; 521 + } 522 + 523 + for (s = 0; s < NR_IO_STREAMS; s++) { 524 + stream = &wreq->io_streams[s]; 525 + if (stream->active && 526 + !stream->front && 527 + stream->collected_to < jump_to) { 528 + trace_netfs_collect_gap(wreq, stream, jump_to, 'B'); 529 + stream->collected_to = jump_to; 530 + } 531 + } 532 + } 533 + 534 + for (s = 0; s < NR_IO_STREAMS; s++) { 535 + stream = &wreq->io_streams[s]; 536 + if (stream->active) 537 + trace_netfs_collect_stream(wreq, stream); 538 + } 539 + 540 + trace_netfs_collect_state(wreq, wreq->collected_to, notes); 541 + 542 + /* Unlock any folios that we have now finished with. */ 543 + if (notes & BUFFERED) { 544 + unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity); 545 + 546 + if (wreq->cleaned_to < clean_to) 547 + netfs_writeback_unlock_folios(wreq, clean_to, &notes); 548 + } else { 549 + wreq->cleaned_to = wreq->collected_to; 550 + } 551 + 552 + // TODO: Discard encryption buffers 553 + 554 + /* If all streams are discontiguous with the last folio we cleared, we 555 + * may need to skip a set of folios. 556 + */ 557 + if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) { 558 + unsigned long long jump_to = ULLONG_MAX; 559 + 560 + for (s = 0; s < NR_IO_STREAMS; s++) { 561 + stream = &wreq->io_streams[s]; 562 + if (stream->active && stream->front && 563 + stream->front->start < jump_to) 564 + jump_to = stream->front->start; 565 + } 566 + 567 + trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump); 568 + wreq->contiguity = jump_to; 569 + wreq->cleaned_to = jump_to; 570 + wreq->collected_to = jump_to; 571 + for (s = 0; s < NR_IO_STREAMS; s++) { 572 + stream = &wreq->io_streams[s]; 573 + if (stream->collected_to < jump_to) 574 + stream->collected_to = jump_to; 575 + } 576 + //cond_resched(); 577 + notes |= MADE_PROGRESS; 578 + goto reassess_streams; 579 + } 580 + 581 + if (notes & NEED_RETRY) 582 + goto need_retry; 583 + if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 584 + trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); 585 + clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); 586 + wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE); 587 + } 588 + 589 + if (notes & NEED_REASSESS) { 590 + //cond_resched(); 591 + goto reassess_streams; 592 + } 593 + if (notes & MADE_PROGRESS) { 594 + //cond_resched(); 595 + goto reassess_streams; 596 + } 597 + 598 + out: 599 + netfs_put_group_many(wreq->group, wreq->nr_group_rel); 600 + wreq->nr_group_rel = 0; 601 + _leave(" = %x", notes); 602 + return; 603 + 604 + need_retry: 605 + /* Okay... We're going to have to retry one or both streams. Note 606 + * that any partially completed op will have had any wholly transferred 607 + * folios removed from it. 608 + */ 609 + _debug("retry"); 610 + netfs_retry_writes(wreq); 611 + goto out; 612 + } 613 + 614 + /* 615 + * Perform the collection of subrequests, folios and encryption buffers. 616 + */ 617 + void netfs_write_collection_worker(struct work_struct *work) 618 + { 619 + struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); 620 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 621 + size_t transferred; 622 + int s; 623 + 624 + _enter("R=%x", wreq->debug_id); 625 + 626 + netfs_see_request(wreq, netfs_rreq_trace_see_work); 627 + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { 628 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 629 + return; 630 + } 631 + 632 + netfs_collect_write_results(wreq); 633 + 634 + /* We're done when the app thread has finished posting subreqs and all 635 + * the queues in all the streams are empty. 636 + */ 637 + if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { 638 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 639 + return; 640 + } 641 + smp_rmb(); /* Read ALL_QUEUED before lists. */ 642 + 643 + transferred = LONG_MAX; 644 + for (s = 0; s < NR_IO_STREAMS; s++) { 645 + struct netfs_io_stream *stream = &wreq->io_streams[s]; 646 + if (!stream->active) 647 + continue; 648 + if (!list_empty(&stream->subrequests)) { 649 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 650 + return; 651 + } 652 + if (stream->transferred < transferred) 653 + transferred = stream->transferred; 654 + } 655 + 656 + /* Okay, declare that all I/O is complete. */ 657 + wreq->transferred = transferred; 658 + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 659 + 660 + if (wreq->io_streams[1].active && 661 + wreq->io_streams[1].failed) { 662 + /* Cache write failure doesn't prevent writeback completion 663 + * unless we're in disconnected mode. 664 + */ 665 + ictx->ops->invalidate_cache(wreq); 666 + } 667 + 668 + if (wreq->cleanup) 669 + wreq->cleanup(wreq); 670 + 671 + if (wreq->origin == NETFS_DIO_WRITE && 672 + wreq->mapping->nrpages) { 673 + /* mmap may have got underfoot and we may now have folios 674 + * locally covering the region we just wrote. Attempt to 675 + * discard the folios, but leave in place any modified locally. 676 + * ->write_iter() is prevented from interfering by the DIO 677 + * counter. 678 + */ 679 + pgoff_t first = wreq->start >> PAGE_SHIFT; 680 + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 681 + invalidate_inode_pages2_range(wreq->mapping, first, last); 682 + } 683 + 684 + if (wreq->origin == NETFS_DIO_WRITE) 685 + inode_dio_end(wreq->inode); 686 + 687 + _debug("finished"); 688 + trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 689 + clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 690 + wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 691 + 692 + if (wreq->iocb) { 693 + wreq->iocb->ki_pos += wreq->transferred; 694 + if (wreq->iocb->ki_complete) 695 + wreq->iocb->ki_complete( 696 + wreq->iocb, wreq->error ? wreq->error : wreq->transferred); 697 + wreq->iocb = VFS_PTR_POISON; 698 + } 699 + 700 + netfs_clear_subrequests(wreq, false); 701 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); 702 + } 703 + 704 + /* 705 + * Wake the collection work item. 706 + */ 707 + void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) 708 + { 709 + if (!work_pending(&wreq->work)) { 710 + netfs_get_request(wreq, netfs_rreq_trace_get_work); 711 + if (!queue_work(system_unbound_wq, &wreq->work)) 712 + netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); 713 + } 714 + } 715 + 716 + /** 717 + * netfs_write_subrequest_terminated - Note the termination of a write operation. 718 + * @_op: The I/O request that has terminated. 719 + * @transferred_or_error: The amount of data transferred or an error code. 720 + * @was_async: The termination was asynchronous 721 + * 722 + * This tells the library that a contributory write I/O operation has 723 + * terminated, one way or another, and that it should collect the results. 724 + * 725 + * The caller indicates in @transferred_or_error the outcome of the operation, 726 + * supplying a positive value to indicate the number of bytes transferred or a 727 + * negative error code. The library will look after reissuing I/O operations 728 + * as appropriate and writing downloaded data to the cache. 729 + * 730 + * If @was_async is true, the caller might be running in softirq or interrupt 731 + * context and we can't sleep. 732 + * 733 + * When this is called, ownership of the subrequest is transferred back to the 734 + * library, along with a ref. 735 + * 736 + * Note that %_op is a void* so that the function can be passed to 737 + * kiocb::term_func without the need for a casting wrapper. 738 + */ 739 + void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 740 + bool was_async) 741 + { 742 + struct netfs_io_subrequest *subreq = _op; 743 + struct netfs_io_request *wreq = subreq->rreq; 744 + struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; 745 + 746 + _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 747 + 748 + switch (subreq->source) { 749 + case NETFS_UPLOAD_TO_SERVER: 750 + netfs_stat(&netfs_n_wh_upload_done); 751 + break; 752 + case NETFS_WRITE_TO_CACHE: 753 + netfs_stat(&netfs_n_wh_write_done); 754 + break; 755 + case NETFS_INVALID_WRITE: 756 + break; 757 + default: 758 + BUG(); 759 + } 760 + 761 + if (IS_ERR_VALUE(transferred_or_error)) { 762 + subreq->error = transferred_or_error; 763 + if (subreq->error == -EAGAIN) 764 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 765 + else 766 + set_bit(NETFS_SREQ_FAILED, &subreq->flags); 767 + trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write); 768 + 769 + switch (subreq->source) { 770 + case NETFS_WRITE_TO_CACHE: 771 + netfs_stat(&netfs_n_wh_write_failed); 772 + break; 773 + case NETFS_UPLOAD_TO_SERVER: 774 + netfs_stat(&netfs_n_wh_upload_failed); 775 + break; 776 + default: 777 + break; 778 + } 779 + trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause); 780 + set_bit(NETFS_RREQ_PAUSE, &wreq->flags); 781 + } else { 782 + if (WARN(transferred_or_error > subreq->len - subreq->transferred, 783 + "Subreq excess write: R=%x[%x] %zd > %zu - %zu", 784 + wreq->debug_id, subreq->debug_index, 785 + transferred_or_error, subreq->len, subreq->transferred)) 786 + transferred_or_error = subreq->len - subreq->transferred; 787 + 788 + subreq->error = 0; 789 + subreq->transferred += transferred_or_error; 790 + 791 + if (subreq->transferred < subreq->len) 792 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 793 + } 794 + 795 + trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 796 + 797 + clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 798 + wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS); 799 + 800 + /* If we are at the head of the queue, wake up the collector, 801 + * transferring a ref to it if we were the ones to do so. 802 + */ 803 + if (list_is_first(&subreq->rreq_link, &stream->subrequests)) 804 + netfs_wake_write_collector(wreq, was_async); 805 + 806 + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 807 + } 808 + EXPORT_SYMBOL(netfs_write_subrequest_terminated);
+684
fs/netfs/write_issue.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem high-level (buffered) writeback. 3 + * 4 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + * 7 + * 8 + * To support network filesystems with local caching, we manage a situation 9 + * that can be envisioned like the following: 10 + * 11 + * +---+---+-----+-----+---+----------+ 12 + * Folios: | | | | | | | 13 + * +---+---+-----+-----+---+----------+ 14 + * 15 + * +------+------+ +----+----+ 16 + * Upload: | | |.....| | | 17 + * (Stream 0) +------+------+ +----+----+ 18 + * 19 + * +------+------+------+------+------+ 20 + * Cache: | | | | | | 21 + * (Stream 1) +------+------+------+------+------+ 22 + * 23 + * Where we have a sequence of folios of varying sizes that we need to overlay 24 + * with multiple parallel streams of I/O requests, where the I/O requests in a 25 + * stream may also be of various sizes (in cifs, for example, the sizes are 26 + * negotiated with the server; in something like ceph, they may represent the 27 + * sizes of storage objects). 28 + * 29 + * The sequence in each stream may contain gaps and noncontiguous subrequests 30 + * may be glued together into single vectored write RPCs. 31 + */ 32 + 33 + #include <linux/export.h> 34 + #include <linux/fs.h> 35 + #include <linux/mm.h> 36 + #include <linux/pagemap.h> 37 + #include "internal.h" 38 + 39 + /* 40 + * Kill all dirty folios in the event of an unrecoverable error, starting with 41 + * a locked folio we've already obtained from writeback_iter(). 42 + */ 43 + static void netfs_kill_dirty_pages(struct address_space *mapping, 44 + struct writeback_control *wbc, 45 + struct folio *folio) 46 + { 47 + int error = 0; 48 + 49 + do { 50 + enum netfs_folio_trace why = netfs_folio_trace_kill; 51 + struct netfs_group *group = NULL; 52 + struct netfs_folio *finfo = NULL; 53 + void *priv; 54 + 55 + priv = folio_detach_private(folio); 56 + if (priv) { 57 + finfo = __netfs_folio_info(priv); 58 + if (finfo) { 59 + /* Kill folio from streaming write. */ 60 + group = finfo->netfs_group; 61 + why = netfs_folio_trace_kill_s; 62 + } else { 63 + group = priv; 64 + if (group == NETFS_FOLIO_COPY_TO_CACHE) { 65 + /* Kill copy-to-cache folio */ 66 + why = netfs_folio_trace_kill_cc; 67 + group = NULL; 68 + } else { 69 + /* Kill folio with group */ 70 + why = netfs_folio_trace_kill_g; 71 + } 72 + } 73 + } 74 + 75 + trace_netfs_folio(folio, why); 76 + 77 + folio_start_writeback(folio); 78 + folio_unlock(folio); 79 + folio_end_writeback(folio); 80 + 81 + netfs_put_group(group); 82 + kfree(finfo); 83 + 84 + } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 85 + } 86 + 87 + /* 88 + * Create a write request and set it up appropriately for the origin type. 89 + */ 90 + struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 91 + struct file *file, 92 + loff_t start, 93 + enum netfs_io_origin origin) 94 + { 95 + struct netfs_io_request *wreq; 96 + struct netfs_inode *ictx; 97 + 98 + wreq = netfs_alloc_request(mapping, file, start, 0, origin); 99 + if (IS_ERR(wreq)) 100 + return wreq; 101 + 102 + _enter("R=%x", wreq->debug_id); 103 + 104 + ictx = netfs_inode(wreq->inode); 105 + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 106 + fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); 107 + 108 + wreq->contiguity = wreq->start; 109 + wreq->cleaned_to = wreq->start; 110 + INIT_WORK(&wreq->work, netfs_write_collection_worker); 111 + 112 + wreq->io_streams[0].stream_nr = 0; 113 + wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER; 114 + wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; 115 + wreq->io_streams[0].issue_write = ictx->ops->issue_write; 116 + wreq->io_streams[0].collected_to = start; 117 + wreq->io_streams[0].transferred = LONG_MAX; 118 + 119 + wreq->io_streams[1].stream_nr = 1; 120 + wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; 121 + wreq->io_streams[1].collected_to = start; 122 + wreq->io_streams[1].transferred = LONG_MAX; 123 + if (fscache_resources_valid(&wreq->cache_resources)) { 124 + wreq->io_streams[1].avail = true; 125 + wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq; 126 + wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write; 127 + } 128 + 129 + return wreq; 130 + } 131 + 132 + /** 133 + * netfs_prepare_write_failed - Note write preparation failed 134 + * @subreq: The subrequest to mark 135 + * 136 + * Mark a subrequest to note that preparation for write failed. 137 + */ 138 + void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq) 139 + { 140 + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 141 + trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed); 142 + } 143 + EXPORT_SYMBOL(netfs_prepare_write_failed); 144 + 145 + /* 146 + * Prepare a write subrequest. We need to allocate a new subrequest 147 + * if we don't have one. 148 + */ 149 + static void netfs_prepare_write(struct netfs_io_request *wreq, 150 + struct netfs_io_stream *stream, 151 + loff_t start) 152 + { 153 + struct netfs_io_subrequest *subreq; 154 + 155 + subreq = netfs_alloc_subrequest(wreq); 156 + subreq->source = stream->source; 157 + subreq->start = start; 158 + subreq->max_len = ULONG_MAX; 159 + subreq->max_nr_segs = INT_MAX; 160 + subreq->stream_nr = stream->stream_nr; 161 + 162 + _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 163 + 164 + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 165 + refcount_read(&subreq->ref), 166 + netfs_sreq_trace_new); 167 + 168 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 169 + 170 + switch (stream->source) { 171 + case NETFS_UPLOAD_TO_SERVER: 172 + netfs_stat(&netfs_n_wh_upload); 173 + subreq->max_len = wreq->wsize; 174 + break; 175 + case NETFS_WRITE_TO_CACHE: 176 + netfs_stat(&netfs_n_wh_write); 177 + break; 178 + default: 179 + WARN_ON_ONCE(1); 180 + break; 181 + } 182 + 183 + if (stream->prepare_write) 184 + stream->prepare_write(subreq); 185 + 186 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 187 + 188 + /* We add to the end of the list whilst the collector may be walking 189 + * the list. The collector only goes nextwards and uses the lock to 190 + * remove entries off of the front. 191 + */ 192 + spin_lock(&wreq->lock); 193 + list_add_tail(&subreq->rreq_link, &stream->subrequests); 194 + if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 195 + stream->front = subreq; 196 + if (!stream->active) { 197 + stream->collected_to = stream->front->start; 198 + /* Write list pointers before active flag */ 199 + smp_store_release(&stream->active, true); 200 + } 201 + } 202 + 203 + spin_unlock(&wreq->lock); 204 + 205 + stream->construct = subreq; 206 + } 207 + 208 + /* 209 + * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O 210 + * operation. The operation may be asynchronous and should call 211 + * netfs_write_subrequest_terminated() when complete. 212 + */ 213 + static void netfs_do_issue_write(struct netfs_io_stream *stream, 214 + struct netfs_io_subrequest *subreq) 215 + { 216 + struct netfs_io_request *wreq = subreq->rreq; 217 + 218 + _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 219 + 220 + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 221 + return netfs_write_subrequest_terminated(subreq, subreq->error, false); 222 + 223 + // TODO: Use encrypted buffer 224 + if (test_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags)) { 225 + subreq->io_iter = wreq->io_iter; 226 + iov_iter_advance(&subreq->io_iter, 227 + subreq->start + subreq->transferred - wreq->start); 228 + iov_iter_truncate(&subreq->io_iter, 229 + subreq->len - subreq->transferred); 230 + } else { 231 + iov_iter_xarray(&subreq->io_iter, ITER_SOURCE, &wreq->mapping->i_pages, 232 + subreq->start + subreq->transferred, 233 + subreq->len - subreq->transferred); 234 + } 235 + 236 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 237 + stream->issue_write(subreq); 238 + } 239 + 240 + void netfs_reissue_write(struct netfs_io_stream *stream, 241 + struct netfs_io_subrequest *subreq) 242 + { 243 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 244 + netfs_do_issue_write(stream, subreq); 245 + } 246 + 247 + static void netfs_issue_write(struct netfs_io_request *wreq, 248 + struct netfs_io_stream *stream) 249 + { 250 + struct netfs_io_subrequest *subreq = stream->construct; 251 + 252 + if (!subreq) 253 + return; 254 + stream->construct = NULL; 255 + 256 + if (subreq->start + subreq->len > wreq->start + wreq->submitted) 257 + wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start; 258 + netfs_do_issue_write(stream, subreq); 259 + } 260 + 261 + /* 262 + * Add data to the write subrequest, dispatching each as we fill it up or if it 263 + * is discontiguous with the previous. We only fill one part at a time so that 264 + * we can avoid overrunning the credits obtained (cifs) and try to parallelise 265 + * content-crypto preparation with network writes. 266 + */ 267 + int netfs_advance_write(struct netfs_io_request *wreq, 268 + struct netfs_io_stream *stream, 269 + loff_t start, size_t len, bool to_eof) 270 + { 271 + struct netfs_io_subrequest *subreq = stream->construct; 272 + size_t part; 273 + 274 + if (!stream->avail) { 275 + _leave("no write"); 276 + return len; 277 + } 278 + 279 + _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 280 + 281 + if (subreq && start != subreq->start + subreq->len) { 282 + netfs_issue_write(wreq, stream); 283 + subreq = NULL; 284 + } 285 + 286 + if (!stream->construct) 287 + netfs_prepare_write(wreq, stream, start); 288 + subreq = stream->construct; 289 + 290 + part = min(subreq->max_len - subreq->len, len); 291 + _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); 292 + subreq->len += part; 293 + subreq->nr_segs++; 294 + 295 + if (subreq->len >= subreq->max_len || 296 + subreq->nr_segs >= subreq->max_nr_segs || 297 + to_eof) { 298 + netfs_issue_write(wreq, stream); 299 + subreq = NULL; 300 + } 301 + 302 + return part; 303 + } 304 + 305 + /* 306 + * Write some of a pending folio data back to the server. 307 + */ 308 + static int netfs_write_folio(struct netfs_io_request *wreq, 309 + struct writeback_control *wbc, 310 + struct folio *folio) 311 + { 312 + struct netfs_io_stream *upload = &wreq->io_streams[0]; 313 + struct netfs_io_stream *cache = &wreq->io_streams[1]; 314 + struct netfs_io_stream *stream; 315 + struct netfs_group *fgroup; /* TODO: Use this with ceph */ 316 + struct netfs_folio *finfo; 317 + size_t fsize = folio_size(folio), flen = fsize, foff = 0; 318 + loff_t fpos = folio_pos(folio), i_size; 319 + bool to_eof = false, streamw = false; 320 + bool debug = false; 321 + 322 + _enter(""); 323 + 324 + /* netfs_perform_write() may shift i_size around the page or from out 325 + * of the page to beyond it, but cannot move i_size into or through the 326 + * page since we have it locked. 327 + */ 328 + i_size = i_size_read(wreq->inode); 329 + 330 + if (fpos >= i_size) { 331 + /* mmap beyond eof. */ 332 + _debug("beyond eof"); 333 + folio_start_writeback(folio); 334 + folio_unlock(folio); 335 + wreq->nr_group_rel += netfs_folio_written_back(folio); 336 + netfs_put_group_many(wreq->group, wreq->nr_group_rel); 337 + wreq->nr_group_rel = 0; 338 + return 0; 339 + } 340 + 341 + if (fpos + fsize > wreq->i_size) 342 + wreq->i_size = i_size; 343 + 344 + fgroup = netfs_folio_group(folio); 345 + finfo = netfs_folio_info(folio); 346 + if (finfo) { 347 + foff = finfo->dirty_offset; 348 + flen = foff + finfo->dirty_len; 349 + streamw = true; 350 + } 351 + 352 + if (wreq->origin == NETFS_WRITETHROUGH) { 353 + to_eof = false; 354 + if (flen > i_size - fpos) 355 + flen = i_size - fpos; 356 + } else if (flen > i_size - fpos) { 357 + flen = i_size - fpos; 358 + if (!streamw) 359 + folio_zero_segment(folio, flen, fsize); 360 + to_eof = true; 361 + } else if (flen == i_size - fpos) { 362 + to_eof = true; 363 + } 364 + flen -= foff; 365 + 366 + _debug("folio %zx %zx %zx", foff, flen, fsize); 367 + 368 + /* Deal with discontinuities in the stream of dirty pages. These can 369 + * arise from a number of sources: 370 + * 371 + * (1) Intervening non-dirty pages from random-access writes, multiple 372 + * flushers writing back different parts simultaneously and manual 373 + * syncing. 374 + * 375 + * (2) Partially-written pages from write-streaming. 376 + * 377 + * (3) Pages that belong to a different write-back group (eg. Ceph 378 + * snapshots). 379 + * 380 + * (4) Actually-clean pages that were marked for write to the cache 381 + * when they were read. Note that these appear as a special 382 + * write-back group. 383 + */ 384 + if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 385 + netfs_issue_write(wreq, upload); 386 + } else if (fgroup != wreq->group) { 387 + /* We can't write this page to the server yet. */ 388 + kdebug("wrong group"); 389 + folio_redirty_for_writepage(wbc, folio); 390 + folio_unlock(folio); 391 + netfs_issue_write(wreq, upload); 392 + netfs_issue_write(wreq, cache); 393 + return 0; 394 + } 395 + 396 + if (foff > 0) 397 + netfs_issue_write(wreq, upload); 398 + if (streamw) 399 + netfs_issue_write(wreq, cache); 400 + 401 + /* Flip the page to the writeback state and unlock. If we're called 402 + * from write-through, then the page has already been put into the wb 403 + * state. 404 + */ 405 + if (wreq->origin == NETFS_WRITEBACK) 406 + folio_start_writeback(folio); 407 + folio_unlock(folio); 408 + 409 + if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 410 + if (!fscache_resources_valid(&wreq->cache_resources)) { 411 + trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); 412 + netfs_issue_write(wreq, upload); 413 + netfs_folio_written_back(folio); 414 + return 0; 415 + } 416 + trace_netfs_folio(folio, netfs_folio_trace_store_copy); 417 + } else if (!upload->construct) { 418 + trace_netfs_folio(folio, netfs_folio_trace_store); 419 + } else { 420 + trace_netfs_folio(folio, netfs_folio_trace_store_plus); 421 + } 422 + 423 + /* Move the submission point forward to allow for write-streaming data 424 + * not starting at the front of the page. We don't do write-streaming 425 + * with the cache as the cache requires DIO alignment. 426 + * 427 + * Also skip uploading for data that's been read and just needs copying 428 + * to the cache. 429 + */ 430 + for (int s = 0; s < NR_IO_STREAMS; s++) { 431 + stream = &wreq->io_streams[s]; 432 + stream->submit_max_len = fsize; 433 + stream->submit_off = foff; 434 + stream->submit_len = flen; 435 + if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) || 436 + (stream->source == NETFS_UPLOAD_TO_SERVER && 437 + fgroup == NETFS_FOLIO_COPY_TO_CACHE)) { 438 + stream->submit_off = UINT_MAX; 439 + stream->submit_len = 0; 440 + stream->submit_max_len = 0; 441 + } 442 + } 443 + 444 + /* Attach the folio to one or more subrequests. For a big folio, we 445 + * could end up with thousands of subrequests if the wsize is small - 446 + * but we might need to wait during the creation of subrequests for 447 + * network resources (eg. SMB credits). 448 + */ 449 + for (;;) { 450 + ssize_t part; 451 + size_t lowest_off = ULONG_MAX; 452 + int choose_s = -1; 453 + 454 + /* Always add to the lowest-submitted stream first. */ 455 + for (int s = 0; s < NR_IO_STREAMS; s++) { 456 + stream = &wreq->io_streams[s]; 457 + if (stream->submit_len > 0 && 458 + stream->submit_off < lowest_off) { 459 + lowest_off = stream->submit_off; 460 + choose_s = s; 461 + } 462 + } 463 + 464 + if (choose_s < 0) 465 + break; 466 + stream = &wreq->io_streams[choose_s]; 467 + 468 + part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 469 + stream->submit_len, to_eof); 470 + atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 471 + stream->submit_off += part; 472 + stream->submit_max_len -= part; 473 + if (part > stream->submit_len) 474 + stream->submit_len = 0; 475 + else 476 + stream->submit_len -= part; 477 + if (part > 0) 478 + debug = true; 479 + } 480 + 481 + atomic64_set(&wreq->issued_to, fpos + fsize); 482 + 483 + if (!debug) 484 + kdebug("R=%x: No submit", wreq->debug_id); 485 + 486 + if (flen < fsize) 487 + for (int s = 0; s < NR_IO_STREAMS; s++) 488 + netfs_issue_write(wreq, &wreq->io_streams[s]); 489 + 490 + _leave(" = 0"); 491 + return 0; 492 + } 493 + 494 + /* 495 + * Write some of the pending data back to the server 496 + */ 497 + int netfs_writepages(struct address_space *mapping, 498 + struct writeback_control *wbc) 499 + { 500 + struct netfs_inode *ictx = netfs_inode(mapping->host); 501 + struct netfs_io_request *wreq = NULL; 502 + struct folio *folio; 503 + int error = 0; 504 + 505 + if (wbc->sync_mode == WB_SYNC_ALL) 506 + mutex_lock(&ictx->wb_lock); 507 + else if (!mutex_trylock(&ictx->wb_lock)) 508 + return 0; 509 + 510 + /* Need the first folio to be able to set up the op. */ 511 + folio = writeback_iter(mapping, wbc, NULL, &error); 512 + if (!folio) 513 + goto out; 514 + 515 + wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK); 516 + if (IS_ERR(wreq)) { 517 + error = PTR_ERR(wreq); 518 + goto couldnt_start; 519 + } 520 + 521 + trace_netfs_write(wreq, netfs_write_trace_writeback); 522 + netfs_stat(&netfs_n_wh_writepages); 523 + 524 + do { 525 + _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); 526 + 527 + /* It appears we don't have to handle cyclic writeback wrapping. */ 528 + WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); 529 + 530 + if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE && 531 + unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) { 532 + set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 533 + wreq->netfs_ops->begin_writeback(wreq); 534 + } 535 + 536 + error = netfs_write_folio(wreq, wbc, folio); 537 + if (error < 0) 538 + break; 539 + } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 540 + 541 + for (int s = 0; s < NR_IO_STREAMS; s++) 542 + netfs_issue_write(wreq, &wreq->io_streams[s]); 543 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 544 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 545 + 546 + mutex_unlock(&ictx->wb_lock); 547 + 548 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 549 + _leave(" = %d", error); 550 + return error; 551 + 552 + couldnt_start: 553 + netfs_kill_dirty_pages(mapping, wbc, folio); 554 + out: 555 + mutex_unlock(&ictx->wb_lock); 556 + _leave(" = %d", error); 557 + return error; 558 + } 559 + EXPORT_SYMBOL(netfs_writepages); 560 + 561 + /* 562 + * Begin a write operation for writing through the pagecache. 563 + */ 564 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 565 + { 566 + struct netfs_io_request *wreq = NULL; 567 + struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp)); 568 + 569 + mutex_lock(&ictx->wb_lock); 570 + 571 + wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, 572 + iocb->ki_pos, NETFS_WRITETHROUGH); 573 + if (IS_ERR(wreq)) { 574 + mutex_unlock(&ictx->wb_lock); 575 + return wreq; 576 + } 577 + 578 + wreq->io_streams[0].avail = true; 579 + trace_netfs_write(wreq, netfs_write_trace_writethrough); 580 + return wreq; 581 + } 582 + 583 + /* 584 + * Advance the state of the write operation used when writing through the 585 + * pagecache. Data has been copied into the pagecache that we need to append 586 + * to the request. If we've added more than wsize then we need to create a new 587 + * subrequest. 588 + */ 589 + int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 590 + struct folio *folio, size_t copied, bool to_page_end, 591 + struct folio **writethrough_cache) 592 + { 593 + _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 594 + wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); 595 + 596 + if (!*writethrough_cache) { 597 + if (folio_test_dirty(folio)) 598 + /* Sigh. mmap. */ 599 + folio_clear_dirty_for_io(folio); 600 + 601 + /* We can make multiple writes to the folio... */ 602 + folio_start_writeback(folio); 603 + if (wreq->len == 0) 604 + trace_netfs_folio(folio, netfs_folio_trace_wthru); 605 + else 606 + trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 607 + *writethrough_cache = folio; 608 + } 609 + 610 + wreq->len += copied; 611 + if (!to_page_end) 612 + return 0; 613 + 614 + *writethrough_cache = NULL; 615 + return netfs_write_folio(wreq, wbc, folio); 616 + } 617 + 618 + /* 619 + * End a write operation used when writing through the pagecache. 620 + */ 621 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 622 + struct folio *writethrough_cache) 623 + { 624 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 625 + int ret; 626 + 627 + _enter("R=%x", wreq->debug_id); 628 + 629 + if (writethrough_cache) 630 + netfs_write_folio(wreq, wbc, writethrough_cache); 631 + 632 + netfs_issue_write(wreq, &wreq->io_streams[0]); 633 + netfs_issue_write(wreq, &wreq->io_streams[1]); 634 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 635 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 636 + 637 + mutex_unlock(&ictx->wb_lock); 638 + 639 + ret = wreq->error; 640 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 641 + return ret; 642 + } 643 + 644 + /* 645 + * Write data to the server without going through the pagecache and without 646 + * writing it to the local cache. 647 + */ 648 + int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) 649 + { 650 + struct netfs_io_stream *upload = &wreq->io_streams[0]; 651 + ssize_t part; 652 + loff_t start = wreq->start; 653 + int error = 0; 654 + 655 + _enter("%zx", len); 656 + 657 + if (wreq->origin == NETFS_DIO_WRITE) 658 + inode_dio_begin(wreq->inode); 659 + 660 + while (len) { 661 + // TODO: Prepare content encryption 662 + 663 + _debug("unbuffered %zx", len); 664 + part = netfs_advance_write(wreq, upload, start, len, false); 665 + start += part; 666 + len -= part; 667 + if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 668 + trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); 669 + wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE); 670 + } 671 + if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) 672 + break; 673 + } 674 + 675 + netfs_issue_write(wreq, upload); 676 + 677 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 678 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 679 + if (list_empty(&upload->subrequests)) 680 + netfs_wake_write_collector(wreq, false); 681 + 682 + _leave(" = %d", error); 683 + return error; 684 + }
+4 -4
fs/nfs/file.c
··· 433 433 return; 434 434 /* Cancel any unstarted writes on this page */ 435 435 nfs_wb_folio_cancel(inode, folio); 436 - folio_wait_fscache(folio); 436 + folio_wait_private_2(folio); /* [DEPRECATED] */ 437 437 trace_nfs_invalidate_folio(inode, folio); 438 438 } 439 439 ··· 500 500 dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", 501 501 inode->i_ino, folio_pos(folio)); 502 502 503 - folio_wait_fscache(folio); 503 + folio_wait_private_2(folio); /* [DEPRECATED] */ 504 504 ret = nfs_wb_folio(inode, folio); 505 505 trace_nfs_launder_folio_done(inode, folio, ret); 506 506 return ret; ··· 593 593 sb_start_pagefault(inode->i_sb); 594 594 595 595 /* make sure the cache has finished storing the page */ 596 - if (folio_test_fscache(folio) && 597 - folio_wait_fscache_killable(folio) < 0) { 596 + if (folio_test_private_2(folio) && /* [DEPRECATED] */ 597 + folio_wait_private_2_killable(folio) < 0) { 598 598 ret = VM_FAULT_RETRY; 599 599 goto out; 600 600 }
+4 -2
fs/nfs/fscache.h
··· 81 81 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) 82 82 { 83 83 netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); 84 + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ 85 + __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); 84 86 } 85 87 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); 86 88 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); ··· 103 101 104 102 static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) 105 103 { 106 - if (folio_test_fscache(folio)) { 104 + if (folio_test_private_2(folio)) { /* [DEPRECATED] */ 107 105 if (current_is_kswapd() || !(gfp & __GFP_FS)) 108 106 return false; 109 - folio_wait_fscache(folio); 107 + folio_wait_private_2(folio); 110 108 } 111 109 fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host))); 112 110 return true;
+2 -2
fs/nfs/write.c
··· 2120 2120 if (folio_test_private(src)) 2121 2121 return -EBUSY; 2122 2122 2123 - if (folio_test_fscache(src)) { 2123 + if (folio_test_private_2(src)) { /* [DEPRECATED] */ 2124 2124 if (mode == MIGRATE_ASYNC) 2125 2125 return -EBUSY; 2126 - folio_wait_fscache(src); 2126 + folio_wait_private_2(src); 2127 2127 } 2128 2128 2129 2129 return migrate_folio(mapping, dst, src, mode);
+1
fs/smb/client/Kconfig
··· 2 2 config CIFS 3 3 tristate "SMB3 and CIFS support (advanced network filesystem)" 4 4 depends on INET 5 + select NETFS_SUPPORT 5 6 select NLS 6 7 select NLS_UCS2_UTILS 7 8 select CRYPTO
+64 -60
fs/smb/client/cifsfs.c
··· 371 371 static struct kmem_cache *cifs_req_cachep; 372 372 static struct kmem_cache *cifs_mid_cachep; 373 373 static struct kmem_cache *cifs_sm_req_cachep; 374 + static struct kmem_cache *cifs_io_request_cachep; 375 + static struct kmem_cache *cifs_io_subrequest_cachep; 374 376 mempool_t *cifs_sm_req_poolp; 375 377 mempool_t *cifs_req_poolp; 376 378 mempool_t *cifs_mid_poolp; 379 + mempool_t cifs_io_request_pool; 380 + mempool_t cifs_io_subrequest_pool; 377 381 378 382 static struct inode * 379 383 cifs_alloc_inode(struct super_block *sb) ··· 990 986 return root; 991 987 } 992 988 993 - 994 - static ssize_t 995 - cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) 996 - { 997 - ssize_t rc; 998 - struct inode *inode = file_inode(iocb->ki_filp); 999 - 1000 - if (iocb->ki_flags & IOCB_DIRECT) 1001 - return cifs_user_readv(iocb, iter); 1002 - 1003 - rc = cifs_revalidate_mapping(inode); 1004 - if (rc) 1005 - return rc; 1006 - 1007 - return generic_file_read_iter(iocb, iter); 1008 - } 1009 - 1010 - static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1011 - { 1012 - struct inode *inode = file_inode(iocb->ki_filp); 1013 - struct cifsInodeInfo *cinode = CIFS_I(inode); 1014 - ssize_t written; 1015 - int rc; 1016 - 1017 - if (iocb->ki_filp->f_flags & O_DIRECT) { 1018 - written = cifs_user_writev(iocb, from); 1019 - if (written > 0 && CIFS_CACHE_READ(cinode)) { 1020 - cifs_zap_mapping(inode); 1021 - cifs_dbg(FYI, 1022 - "Set no oplock for inode=%p after a write operation\n", 1023 - inode); 1024 - cinode->oplock = 0; 1025 - } 1026 - return written; 1027 - } 1028 - 1029 - written = cifs_get_writer(cinode); 1030 - if (written) 1031 - return written; 1032 - 1033 - written = generic_file_write_iter(iocb, from); 1034 - 1035 - if (CIFS_CACHE_WRITE(CIFS_I(inode))) 1036 - goto out; 1037 - 1038 - rc = filemap_fdatawrite(inode->i_mapping); 1039 - if (rc) 1040 - cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", 1041 - rc, inode); 1042 - 1043 - out: 1044 - cifs_put_writer(cinode); 1045 - return written; 1046 - } 1047 - 1048 989 static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) 1049 990 { 1050 991 struct cifsFileInfo *cfile = file->private_data; ··· 1291 1342 rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); 1292 1343 if (rc) 1293 1344 goto unlock; 1345 + if (fend > target_cifsi->netfs.zero_point) 1346 + target_cifsi->netfs.zero_point = fend + 1; 1294 1347 1295 1348 /* Discard all the folios that overlap the destination region. */ 1296 1349 cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); ··· 1311 1360 fscache_resize_cookie(cifs_inode_cookie(target_inode), 1312 1361 new_size); 1313 1362 } 1363 + if (rc == 0 && new_size > target_cifsi->netfs.zero_point) 1364 + target_cifsi->netfs.zero_point = new_size; 1314 1365 } 1315 1366 1316 1367 /* force revalidate of size and timestamps of target file now ··· 1404 1451 rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); 1405 1452 if (rc) 1406 1453 goto unlock; 1454 + if (fend > target_cifsi->netfs.zero_point) 1455 + target_cifsi->netfs.zero_point = fend + 1; 1407 1456 1408 1457 /* Discard all the folios that overlap the destination region. */ 1409 1458 truncate_inode_pages_range(&target_inode->i_data, fstart, fend); ··· 1522 1567 }; 1523 1568 1524 1569 const struct file_operations cifs_file_direct_ops = { 1525 - .read_iter = cifs_direct_readv, 1526 - .write_iter = cifs_direct_writev, 1570 + .read_iter = netfs_unbuffered_read_iter, 1571 + .write_iter = netfs_file_write_iter, 1527 1572 .open = cifs_open, 1528 1573 .release = cifs_close, 1529 1574 .lock = cifs_lock, ··· 1578 1623 }; 1579 1624 1580 1625 const struct file_operations cifs_file_direct_nobrl_ops = { 1581 - .read_iter = cifs_direct_readv, 1582 - .write_iter = cifs_direct_writev, 1626 + .read_iter = netfs_unbuffered_read_iter, 1627 + .write_iter = netfs_file_write_iter, 1583 1628 .open = cifs_open, 1584 1629 .release = cifs_close, 1585 1630 .fsync = cifs_fsync, ··· 1754 1799 kmem_cache_destroy(cifs_mid_cachep); 1755 1800 } 1756 1801 1802 + static int cifs_init_netfs(void) 1803 + { 1804 + cifs_io_request_cachep = 1805 + kmem_cache_create("cifs_io_request", 1806 + sizeof(struct cifs_io_request), 0, 1807 + SLAB_HWCACHE_ALIGN, NULL); 1808 + if (!cifs_io_request_cachep) 1809 + goto nomem_req; 1810 + 1811 + if (mempool_init_slab_pool(&cifs_io_request_pool, 100, cifs_io_request_cachep) < 0) 1812 + goto nomem_reqpool; 1813 + 1814 + cifs_io_subrequest_cachep = 1815 + kmem_cache_create("cifs_io_subrequest", 1816 + sizeof(struct cifs_io_subrequest), 0, 1817 + SLAB_HWCACHE_ALIGN, NULL); 1818 + if (!cifs_io_subrequest_cachep) 1819 + goto nomem_subreq; 1820 + 1821 + if (mempool_init_slab_pool(&cifs_io_subrequest_pool, 100, cifs_io_subrequest_cachep) < 0) 1822 + goto nomem_subreqpool; 1823 + 1824 + return 0; 1825 + 1826 + nomem_subreqpool: 1827 + kmem_cache_destroy(cifs_io_subrequest_cachep); 1828 + nomem_subreq: 1829 + mempool_destroy(&cifs_io_request_pool); 1830 + nomem_reqpool: 1831 + kmem_cache_destroy(cifs_io_request_cachep); 1832 + nomem_req: 1833 + return -ENOMEM; 1834 + } 1835 + 1836 + static void cifs_destroy_netfs(void) 1837 + { 1838 + mempool_destroy(&cifs_io_subrequest_pool); 1839 + kmem_cache_destroy(cifs_io_subrequest_cachep); 1840 + mempool_destroy(&cifs_io_request_pool); 1841 + kmem_cache_destroy(cifs_io_request_cachep); 1842 + } 1843 + 1757 1844 static int __init 1758 1845 init_cifs(void) 1759 1846 { ··· 1900 1903 if (rc) 1901 1904 goto out_destroy_deferredclose_wq; 1902 1905 1903 - rc = init_mids(); 1906 + rc = cifs_init_netfs(); 1904 1907 if (rc) 1905 1908 goto out_destroy_inodecache; 1909 + 1910 + rc = init_mids(); 1911 + if (rc) 1912 + goto out_destroy_netfs; 1906 1913 1907 1914 rc = cifs_init_request_bufs(); 1908 1915 if (rc) ··· 1962 1961 cifs_destroy_request_bufs(); 1963 1962 out_destroy_mids: 1964 1963 destroy_mids(); 1964 + out_destroy_netfs: 1965 + cifs_destroy_netfs(); 1965 1966 out_destroy_inodecache: 1966 1967 cifs_destroy_inodecache(); 1967 1968 out_destroy_deferredclose_wq: ··· 2002 1999 #endif 2003 2000 cifs_destroy_request_bufs(); 2004 2001 destroy_mids(); 2002 + cifs_destroy_netfs(); 2005 2003 cifs_destroy_inodecache(); 2006 2004 destroy_workqueue(deferredclose_wq); 2007 2005 destroy_workqueue(cifsoplockd_wq);
+3 -8
fs/smb/client/cifsfs.h
··· 69 69 extern int cifs_revalidate_dentry_attr(struct dentry *); 70 70 extern int cifs_revalidate_file(struct file *filp); 71 71 extern int cifs_revalidate_dentry(struct dentry *); 72 - extern int cifs_invalidate_mapping(struct inode *inode); 73 72 extern int cifs_revalidate_mapping(struct inode *inode); 74 73 extern int cifs_zap_mapping(struct inode *inode); 75 74 extern int cifs_getattr(struct mnt_idmap *, const struct path *, ··· 84 85 85 86 86 87 /* Functions related to files and directories */ 88 + extern const struct netfs_request_ops cifs_req_ops; 87 89 extern const struct file_operations cifs_file_ops; 88 90 extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ 89 91 extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */ ··· 94 94 extern int cifs_open(struct inode *inode, struct file *file); 95 95 extern int cifs_close(struct inode *inode, struct file *file); 96 96 extern int cifs_closedir(struct inode *inode, struct file *file); 97 - extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); 98 - extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); 99 97 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); 100 - extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); 101 - extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); 102 98 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); 99 + ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); 100 + ssize_t cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter); 103 101 extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock); 104 102 extern int cifs_lock(struct file *, int, struct file_lock *); 105 103 extern int cifs_fsync(struct file *, loff_t, loff_t, int); ··· 108 110 extern const struct file_operations cifs_dir_ops; 109 111 extern int cifs_dir_open(struct inode *inode, struct file *file); 110 112 extern int cifs_readdir(struct file *file, struct dir_context *ctx); 111 - extern void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len); 112 - extern void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len); 113 - extern void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len); 114 113 115 114 /* Functions related to dir entries */ 116 115 extern const struct dentry_operations cifs_dentry_ops;
+23 -42
fs/smb/client/cifsglob.h
··· 268 268 struct cifs_fattr; 269 269 struct smb3_fs_context; 270 270 struct cifs_fid; 271 - struct cifs_readdata; 272 - struct cifs_writedata; 271 + struct cifs_io_subrequest; 273 272 struct cifs_io_parms; 274 273 struct cifs_search_info; 275 274 struct cifsInodeInfo; ··· 449 450 /* send a flush request to the server */ 450 451 int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); 451 452 /* async read from the server */ 452 - int (*async_readv)(struct cifs_readdata *); 453 + int (*async_readv)(struct cifs_io_subrequest *); 453 454 /* async write to the server */ 454 - int (*async_writev)(struct cifs_writedata *, 455 - void (*release)(struct kref *)); 455 + void (*async_writev)(struct cifs_io_subrequest *); 456 456 /* sync read from the server */ 457 457 int (*sync_read)(const unsigned int, struct cifs_fid *, 458 458 struct cifs_io_parms *, unsigned int *, char **, ··· 546 548 /* writepages retry size */ 547 549 unsigned int (*wp_retry_size)(struct inode *); 548 550 /* get mtu credits */ 549 - int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, 550 - unsigned int *, struct cifs_credits *); 551 + int (*wait_mtu_credits)(struct TCP_Server_Info *, size_t, 552 + size_t *, struct cifs_credits *); 551 553 /* adjust previously taken mtu credits to request size */ 552 554 int (*adjust_credits)(struct TCP_Server_Info *server, 553 555 struct cifs_credits *credits, ··· 881 883 882 884 static inline void 883 885 add_credits_and_wake_if(struct TCP_Server_Info *server, 884 - const struct cifs_credits *credits, const int optype) 886 + struct cifs_credits *credits, const int optype) 885 887 { 886 888 if (credits->value) { 887 889 server->ops->add_credits(server, credits, optype); 888 890 wake_up(&server->request_q); 891 + credits->value = 0; 889 892 } 890 893 } 891 894 ··· 1491 1492 bool direct_io; 1492 1493 }; 1493 1494 1494 - /* asynchronous read support */ 1495 - struct cifs_readdata { 1496 - struct kref refcount; 1497 - struct list_head list; 1498 - struct completion done; 1495 + struct cifs_io_request { 1496 + struct netfs_io_request rreq; 1499 1497 struct cifsFileInfo *cfile; 1500 - struct address_space *mapping; 1501 - struct cifs_aio_ctx *ctx; 1502 - __u64 offset; 1498 + }; 1499 + 1500 + /* asynchronous read support */ 1501 + struct cifs_io_subrequest { 1502 + union { 1503 + struct netfs_io_subrequest subreq; 1504 + struct netfs_io_request *rreq; 1505 + struct cifs_io_request *req; 1506 + }; 1503 1507 ssize_t got_bytes; 1504 - unsigned int bytes; 1505 1508 pid_t pid; 1509 + unsigned int xid; 1506 1510 int result; 1507 - struct work_struct work; 1508 - struct iov_iter iter; 1511 + bool have_xid; 1512 + bool replay; 1509 1513 struct kvec iov[2]; 1510 1514 struct TCP_Server_Info *server; 1511 1515 #ifdef CONFIG_CIFS_SMB_DIRECT 1512 1516 struct smbd_mr *mr; 1513 1517 #endif 1514 1518 struct cifs_credits credits; 1515 - }; 1516 - 1517 - /* asynchronous write support */ 1518 - struct cifs_writedata { 1519 - struct kref refcount; 1520 - struct list_head list; 1521 - struct completion done; 1522 - enum writeback_sync_modes sync_mode; 1523 - struct work_struct work; 1524 - struct cifsFileInfo *cfile; 1525 - struct cifs_aio_ctx *ctx; 1526 - struct iov_iter iter; 1527 - struct bio_vec *bv; 1528 - __u64 offset; 1529 - pid_t pid; 1530 - unsigned int bytes; 1531 - int result; 1532 - struct TCP_Server_Info *server; 1533 - #ifdef CONFIG_CIFS_SMB_DIRECT 1534 - struct smbd_mr *mr; 1535 - #endif 1536 - struct cifs_credits credits; 1537 - bool replay; 1538 1519 }; 1539 1520 1540 1521 /* ··· 2094 2115 extern mempool_t *cifs_sm_req_poolp; 2095 2116 extern mempool_t *cifs_req_poolp; 2096 2117 extern mempool_t *cifs_mid_poolp; 2118 + extern mempool_t cifs_io_request_pool; 2119 + extern mempool_t cifs_io_subrequest_pool; 2097 2120 2098 2121 /* Operations for different SMB versions */ 2099 2122 #define SMB1_VERSION_STRING "1.0"
+5 -7
fs/smb/client/cifsproto.h
··· 121 121 extern int cifs_check_receive(struct mid_q_entry *mid, 122 122 struct TCP_Server_Info *server, bool log_error); 123 123 extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, 124 - unsigned int size, unsigned int *num, 124 + size_t size, size_t *num, 125 125 struct cifs_credits *credits); 126 126 extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 127 127 struct kvec *, int /* nvec to send */, ··· 148 148 bool from_readdir); 149 149 extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 150 150 unsigned int bytes_written); 151 + void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, 152 + bool was_async); 151 153 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); 152 154 extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, 153 155 int flags, ··· 601 599 extern struct cifs_ses * 602 600 cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx); 603 601 604 - void cifs_readdata_release(struct kref *refcount); 605 - int cifs_async_readv(struct cifs_readdata *rdata); 602 + int cifs_async_readv(struct cifs_io_subrequest *rdata); 606 603 int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); 607 604 608 - int cifs_async_writev(struct cifs_writedata *wdata, 609 - void (*release)(struct kref *kref)); 605 + void cifs_async_writev(struct cifs_io_subrequest *wdata); 610 606 void cifs_writev_complete(struct work_struct *work); 611 - struct cifs_writedata *cifs_writedata_alloc(work_func_t complete); 612 - void cifs_writedata_release(struct kref *refcount); 613 607 int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, 614 608 struct cifs_sb_info *cifs_sb, 615 609 const unsigned char *path, char *pbuf,
+63 -57
fs/smb/client/cifssmb.c
··· 24 24 #include <linux/swap.h> 25 25 #include <linux/task_io_accounting_ops.h> 26 26 #include <linux/uaccess.h> 27 + #include <linux/netfs.h> 28 + #include <trace/events/netfs.h> 27 29 #include "cifspdu.h" 28 30 #include "cifsfs.h" 29 31 #include "cifsglob.h" ··· 1264 1262 static void 1265 1263 cifs_readv_callback(struct mid_q_entry *mid) 1266 1264 { 1267 - struct cifs_readdata *rdata = mid->callback_data; 1268 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1265 + struct cifs_io_subrequest *rdata = mid->callback_data; 1266 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 1269 1267 struct TCP_Server_Info *server = tcon->ses->server; 1270 1268 struct smb_rqst rqst = { .rq_iov = rdata->iov, 1271 1269 .rq_nvec = 2, 1272 - .rq_iter_size = iov_iter_count(&rdata->iter), 1273 - .rq_iter = rdata->iter }; 1270 + .rq_iter = rdata->subreq.io_iter }; 1274 1271 struct cifs_credits credits = { .value = 1, .instance = 0 }; 1275 1272 1276 - cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", 1273 + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", 1277 1274 __func__, mid->mid, mid->mid_state, rdata->result, 1278 - rdata->bytes); 1275 + rdata->subreq.len); 1279 1276 1280 1277 switch (mid->mid_state) { 1281 1278 case MID_RESPONSE_RECEIVED: ··· 1306 1305 rdata->result = -EIO; 1307 1306 } 1308 1307 1309 - queue_work(cifsiod_wq, &rdata->work); 1308 + if (rdata->result == 0 || rdata->result == -EAGAIN) 1309 + iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); 1310 + rdata->credits.value = 0; 1311 + netfs_subreq_terminated(&rdata->subreq, 1312 + (rdata->result == 0 || rdata->result == -EAGAIN) ? 1313 + rdata->got_bytes : rdata->result, 1314 + false); 1310 1315 release_mid(mid); 1311 1316 add_credits(server, &credits, 0); 1312 1317 } 1313 1318 1314 1319 /* cifs_async_readv - send an async write, and set up mid to handle result */ 1315 1320 int 1316 - cifs_async_readv(struct cifs_readdata *rdata) 1321 + cifs_async_readv(struct cifs_io_subrequest *rdata) 1317 1322 { 1318 1323 int rc; 1319 1324 READ_REQ *smb = NULL; 1320 1325 int wct; 1321 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1326 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 1322 1327 struct smb_rqst rqst = { .rq_iov = rdata->iov, 1323 1328 .rq_nvec = 2 }; 1324 1329 1325 - cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 1326 - __func__, rdata->offset, rdata->bytes); 1330 + cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", 1331 + __func__, rdata->subreq.start, rdata->subreq.len); 1327 1332 1328 1333 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1329 1334 wct = 12; 1330 1335 else { 1331 1336 wct = 10; /* old style read */ 1332 - if ((rdata->offset >> 32) > 0) { 1337 + if ((rdata->subreq.start >> 32) > 0) { 1333 1338 /* can not handle this big offset for old */ 1334 1339 return -EIO; 1335 1340 } ··· 1349 1342 smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); 1350 1343 1351 1344 smb->AndXCommand = 0xFF; /* none */ 1352 - smb->Fid = rdata->cfile->fid.netfid; 1353 - smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); 1345 + smb->Fid = rdata->req->cfile->fid.netfid; 1346 + smb->OffsetLow = cpu_to_le32(rdata->subreq.start & 0xFFFFFFFF); 1354 1347 if (wct == 12) 1355 - smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); 1348 + smb->OffsetHigh = cpu_to_le32(rdata->subreq.start >> 32); 1356 1349 smb->Remaining = 0; 1357 - smb->MaxCount = cpu_to_le16(rdata->bytes & 0xFFFF); 1358 - smb->MaxCountHigh = cpu_to_le32(rdata->bytes >> 16); 1350 + smb->MaxCount = cpu_to_le16(rdata->subreq.len & 0xFFFF); 1351 + smb->MaxCountHigh = cpu_to_le32(rdata->subreq.len >> 16); 1359 1352 if (wct == 12) 1360 1353 smb->ByteCount = 0; 1361 1354 else { ··· 1371 1364 rdata->iov[1].iov_base = (char *)smb + 4; 1372 1365 rdata->iov[1].iov_len = get_rfc1002_length(smb); 1373 1366 1374 - kref_get(&rdata->refcount); 1375 1367 rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, 1376 1368 cifs_readv_callback, NULL, rdata, 0, NULL); 1377 1369 1378 1370 if (rc == 0) 1379 1371 cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); 1380 - else 1381 - kref_put(&rdata->refcount, cifs_readdata_release); 1382 - 1383 1372 cifs_small_buf_release(smb); 1384 1373 return rc; 1385 1374 } ··· 1618 1615 static void 1619 1616 cifs_writev_callback(struct mid_q_entry *mid) 1620 1617 { 1621 - struct cifs_writedata *wdata = mid->callback_data; 1622 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1623 - unsigned int written; 1618 + struct cifs_io_subrequest *wdata = mid->callback_data; 1619 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 1624 1620 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; 1625 1621 struct cifs_credits credits = { .value = 1, .instance = 0 }; 1622 + ssize_t result; 1623 + size_t written; 1626 1624 1627 1625 switch (mid->mid_state) { 1628 1626 case MID_RESPONSE_RECEIVED: 1629 - wdata->result = cifs_check_receive(mid, tcon->ses->server, 0); 1630 - if (wdata->result != 0) 1627 + result = cifs_check_receive(mid, tcon->ses->server, 0); 1628 + if (result != 0) 1631 1629 break; 1632 1630 1633 1631 written = le16_to_cpu(smb->CountHigh); ··· 1640 1636 * client. OS/2 servers are known to set incorrect 1641 1637 * CountHigh values. 1642 1638 */ 1643 - if (written > wdata->bytes) 1639 + if (written > wdata->subreq.len) 1644 1640 written &= 0xFFFF; 1645 1641 1646 - if (written < wdata->bytes) 1647 - wdata->result = -ENOSPC; 1642 + if (written < wdata->subreq.len) 1643 + result = -ENOSPC; 1648 1644 else 1649 - wdata->bytes = written; 1645 + result = written; 1650 1646 break; 1651 1647 case MID_REQUEST_SUBMITTED: 1652 1648 case MID_RETRY_NEEDED: 1653 - wdata->result = -EAGAIN; 1649 + result = -EAGAIN; 1654 1650 break; 1655 1651 default: 1656 - wdata->result = -EIO; 1652 + result = -EIO; 1657 1653 break; 1658 1654 } 1659 1655 1660 - queue_work(cifsiod_wq, &wdata->work); 1656 + wdata->credits.value = 0; 1657 + cifs_write_subrequest_terminated(wdata, result, true); 1661 1658 release_mid(mid); 1662 1659 add_credits(tcon->ses->server, &credits, 0); 1663 1660 } 1664 1661 1665 1662 /* cifs_async_writev - send an async write, and set up mid to handle result */ 1666 - int 1667 - cifs_async_writev(struct cifs_writedata *wdata, 1668 - void (*release)(struct kref *kref)) 1663 + void 1664 + cifs_async_writev(struct cifs_io_subrequest *wdata) 1669 1665 { 1670 1666 int rc = -EACCES; 1671 1667 WRITE_REQ *smb = NULL; 1672 1668 int wct; 1673 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1669 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 1674 1670 struct kvec iov[2]; 1675 1671 struct smb_rqst rqst = { }; 1676 1672 ··· 1678 1674 wct = 14; 1679 1675 } else { 1680 1676 wct = 12; 1681 - if (wdata->offset >> 32 > 0) { 1677 + if (wdata->subreq.start >> 32 > 0) { 1682 1678 /* can not handle big offset for old srv */ 1683 - return -EIO; 1679 + rc = -EIO; 1680 + goto out; 1684 1681 } 1685 1682 } 1686 1683 ··· 1693 1688 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); 1694 1689 1695 1690 smb->AndXCommand = 0xFF; /* none */ 1696 - smb->Fid = wdata->cfile->fid.netfid; 1697 - smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); 1691 + smb->Fid = wdata->req->cfile->fid.netfid; 1692 + smb->OffsetLow = cpu_to_le32(wdata->subreq.start & 0xFFFFFFFF); 1698 1693 if (wct == 14) 1699 - smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); 1694 + smb->OffsetHigh = cpu_to_le32(wdata->subreq.start >> 32); 1700 1695 smb->Reserved = 0xFFFFFFFF; 1701 1696 smb->WriteMode = 0; 1702 1697 smb->Remaining = 0; ··· 1712 1707 1713 1708 rqst.rq_iov = iov; 1714 1709 rqst.rq_nvec = 2; 1715 - rqst.rq_iter = wdata->iter; 1716 - rqst.rq_iter_size = iov_iter_count(&wdata->iter); 1710 + rqst.rq_iter = wdata->subreq.io_iter; 1711 + rqst.rq_iter_size = iov_iter_count(&wdata->subreq.io_iter); 1717 1712 1718 - cifs_dbg(FYI, "async write at %llu %u bytes\n", 1719 - wdata->offset, wdata->bytes); 1713 + cifs_dbg(FYI, "async write at %llu %zu bytes\n", 1714 + wdata->subreq.start, wdata->subreq.len); 1720 1715 1721 - smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF); 1722 - smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16); 1716 + smb->DataLengthLow = cpu_to_le16(wdata->subreq.len & 0xFFFF); 1717 + smb->DataLengthHigh = cpu_to_le16(wdata->subreq.len >> 16); 1723 1718 1724 1719 if (wct == 14) { 1725 - inc_rfc1001_len(&smb->hdr, wdata->bytes + 1); 1726 - put_bcc(wdata->bytes + 1, &smb->hdr); 1720 + inc_rfc1001_len(&smb->hdr, wdata->subreq.len + 1); 1721 + put_bcc(wdata->subreq.len + 1, &smb->hdr); 1727 1722 } else { 1728 1723 /* wct == 12 */ 1729 1724 struct smb_com_writex_req *smbw = 1730 1725 (struct smb_com_writex_req *)smb; 1731 - inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); 1732 - put_bcc(wdata->bytes + 5, &smbw->hdr); 1726 + inc_rfc1001_len(&smbw->hdr, wdata->subreq.len + 5); 1727 + put_bcc(wdata->subreq.len + 5, &smbw->hdr); 1733 1728 iov[1].iov_len += 4; /* pad bigger by four bytes */ 1734 1729 } 1735 1730 1736 - kref_get(&wdata->refcount); 1737 1731 rc = cifs_call_async(tcon->ses->server, &rqst, NULL, 1738 1732 cifs_writev_callback, NULL, wdata, 0, NULL); 1739 - 1733 + /* Can't touch wdata if rc == 0 */ 1740 1734 if (rc == 0) 1741 1735 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); 1742 - else 1743 - kref_put(&wdata->refcount, release); 1744 1736 1745 1737 async_writev_out: 1746 1738 cifs_small_buf_release(smb); 1747 - return rc; 1739 + out: 1740 + if (rc) { 1741 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 1742 + cifs_write_subrequest_terminated(wdata, rc, false); 1743 + } 1748 1744 } 1749 1745 1750 1746 int
+370 -2384
fs/smb/client/file.c
··· 36 36 #include "fs_context.h" 37 37 #include "cifs_ioctl.h" 38 38 #include "cached_dir.h" 39 + #include <trace/events/netfs.h> 40 + 41 + static int cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush); 39 42 40 43 /* 41 - * Remove the dirty flags from a span of pages. 44 + * Prepare a subrequest to upload to the server. We need to allocate credits 45 + * so that we know the maximum amount of data that we can include in it. 42 46 */ 43 - static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 47 + static void cifs_prepare_write(struct netfs_io_subrequest *subreq) 44 48 { 45 - struct address_space *mapping = inode->i_mapping; 46 - struct folio *folio; 47 - pgoff_t end; 49 + struct cifs_io_subrequest *wdata = 50 + container_of(subreq, struct cifs_io_subrequest, subreq); 51 + struct cifs_io_request *req = wdata->req; 52 + struct TCP_Server_Info *server; 53 + struct cifsFileInfo *open_file = req->cfile; 54 + size_t wsize = req->rreq.wsize; 55 + int rc; 48 56 49 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 - 51 - rcu_read_lock(); 52 - 53 - end = (start + len - 1) / PAGE_SIZE; 54 - xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 - if (xas_retry(&xas, folio)) 56 - continue; 57 - xas_pause(&xas); 58 - rcu_read_unlock(); 59 - folio_lock(folio); 60 - folio_clear_dirty_for_io(folio); 61 - folio_unlock(folio); 62 - rcu_read_lock(); 57 + if (!wdata->have_xid) { 58 + wdata->xid = get_xid(); 59 + wdata->have_xid = true; 63 60 } 64 61 65 - rcu_read_unlock(); 62 + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 63 + wdata->server = server; 64 + 65 + retry: 66 + if (open_file->invalidHandle) { 67 + rc = cifs_reopen_file(open_file, false); 68 + if (rc < 0) { 69 + if (rc == -EAGAIN) 70 + goto retry; 71 + subreq->error = rc; 72 + return netfs_prepare_write_failed(subreq); 73 + } 74 + } 75 + 76 + rc = server->ops->wait_mtu_credits(server, wsize, &wdata->subreq.max_len, 77 + &wdata->credits); 78 + if (rc < 0) { 79 + subreq->error = rc; 80 + return netfs_prepare_write_failed(subreq); 81 + } 82 + 83 + #ifdef CONFIG_CIFS_SMB_DIRECT 84 + if (server->smbd_conn) 85 + subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; 86 + #endif 66 87 } 67 88 68 89 /* 69 - * Completion of write to server. 90 + * Issue a subrequest to upload to the server. 70 91 */ 71 - void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 92 + static void cifs_issue_write(struct netfs_io_subrequest *subreq) 72 93 { 73 - struct address_space *mapping = inode->i_mapping; 74 - struct folio *folio; 75 - pgoff_t end; 94 + struct cifs_io_subrequest *wdata = 95 + container_of(subreq, struct cifs_io_subrequest, subreq); 96 + struct cifs_sb_info *sbi = CIFS_SB(subreq->rreq->inode->i_sb); 97 + int rc; 76 98 77 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 99 + if (cifs_forced_shutdown(sbi)) { 100 + rc = -EIO; 101 + goto fail; 102 + } 78 103 79 - if (!len) 104 + rc = adjust_credits(wdata->server, &wdata->credits, wdata->subreq.len); 105 + if (rc) 106 + goto fail; 107 + 108 + rc = -EAGAIN; 109 + if (wdata->req->cfile->invalidHandle) 110 + goto fail; 111 + 112 + wdata->server->ops->async_writev(wdata); 113 + out: 114 + return; 115 + 116 + fail: 117 + if (rc == -EAGAIN) 118 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 119 + else 120 + trace_netfs_sreq(subreq, netfs_sreq_trace_fail); 121 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 122 + cifs_write_subrequest_terminated(wdata, rc, false); 123 + goto out; 124 + } 125 + 126 + /* 127 + * Split the read up according to how many credits we can get for each piece. 128 + * It's okay to sleep here if we need to wait for more credit to become 129 + * available. 130 + * 131 + * We also choose the server and allocate an operation ID to be cleaned up 132 + * later. 133 + */ 134 + static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) 135 + { 136 + struct netfs_io_request *rreq = subreq->rreq; 137 + struct TCP_Server_Info *server; 138 + struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 139 + struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 140 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 141 + size_t rsize = 0; 142 + int rc; 143 + 144 + rdata->xid = get_xid(); 145 + rdata->have_xid = true; 146 + 147 + server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); 148 + rdata->server = server; 149 + 150 + if (cifs_sb->ctx->rsize == 0) 151 + cifs_sb->ctx->rsize = 152 + server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), 153 + cifs_sb->ctx); 154 + 155 + 156 + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, 157 + &rdata->credits); 158 + if (rc) { 159 + subreq->error = rc; 160 + return false; 161 + } 162 + 163 + subreq->len = min_t(size_t, subreq->len, rsize); 164 + #ifdef CONFIG_CIFS_SMB_DIRECT 165 + if (server->smbd_conn) 166 + subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; 167 + #endif 168 + return true; 169 + } 170 + 171 + /* 172 + * Issue a read operation on behalf of the netfs helper functions. We're asked 173 + * to make a read of a certain size at a point in the file. We are permitted 174 + * to only read a portion of that, but as long as we read something, the netfs 175 + * helper will call us again so that we can issue another read. 176 + */ 177 + static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) 178 + { 179 + struct netfs_io_request *rreq = subreq->rreq; 180 + struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 181 + struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 182 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 183 + pid_t pid; 184 + int rc = 0; 185 + 186 + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 187 + pid = req->cfile->pid; 188 + else 189 + pid = current->tgid; // Ummm... This may be a workqueue 190 + 191 + cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", 192 + __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, 193 + subreq->transferred, subreq->len); 194 + 195 + if (req->cfile->invalidHandle) { 196 + do { 197 + rc = cifs_reopen_file(req->cfile, true); 198 + } while (rc == -EAGAIN); 199 + if (rc) 200 + goto out; 201 + } 202 + 203 + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 204 + rdata->pid = pid; 205 + 206 + rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len); 207 + if (!rc) { 208 + if (rdata->req->cfile->invalidHandle) 209 + rc = -EAGAIN; 210 + else 211 + rc = rdata->server->ops->async_readv(rdata); 212 + } 213 + 214 + out: 215 + if (rc) 216 + netfs_subreq_terminated(subreq, rc, false); 217 + } 218 + 219 + /* 220 + * Writeback calls this when it finds a folio that needs uploading. This isn't 221 + * called if writeback only has copy-to-cache to deal with. 222 + */ 223 + static void cifs_begin_writeback(struct netfs_io_request *wreq) 224 + { 225 + struct cifs_io_request *req = container_of(wreq, struct cifs_io_request, rreq); 226 + int ret; 227 + 228 + ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_WR_ANY, &req->cfile); 229 + if (ret) { 230 + cifs_dbg(VFS, "No writable handle in writepages ret=%d\n", ret); 231 + return; 232 + } 233 + 234 + wreq->io_streams[0].avail = true; 235 + } 236 + 237 + /* 238 + * Initialise a request. 239 + */ 240 + static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) 241 + { 242 + struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); 243 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 244 + struct cifsFileInfo *open_file = NULL; 245 + 246 + rreq->rsize = cifs_sb->ctx->rsize; 247 + rreq->wsize = cifs_sb->ctx->wsize; 248 + 249 + if (file) { 250 + open_file = file->private_data; 251 + rreq->netfs_priv = file->private_data; 252 + req->cfile = cifsFileInfo_get(open_file); 253 + } else if (rreq->origin != NETFS_WRITEBACK) { 254 + WARN_ON_ONCE(1); 255 + return -EIO; 256 + } 257 + 258 + return 0; 259 + } 260 + 261 + /* 262 + * Expand the size of a readahead to the size of the rsize, if at least as 263 + * large as a page, allowing for the possibility that rsize is not pow-2 264 + * aligned. 265 + */ 266 + static void cifs_expand_readahead(struct netfs_io_request *rreq) 267 + { 268 + unsigned int rsize = rreq->rsize; 269 + loff_t misalignment, i_size = i_size_read(rreq->inode); 270 + 271 + if (rsize < PAGE_SIZE) 80 272 return; 81 273 82 - rcu_read_lock(); 274 + if (rsize < INT_MAX) 275 + rsize = roundup_pow_of_two(rsize); 276 + else 277 + rsize = ((unsigned int)INT_MAX + 1) / 2; 83 278 84 - end = (start + len - 1) / PAGE_SIZE; 85 - xas_for_each(&xas, folio, end) { 86 - if (xas_retry(&xas, folio)) 87 - continue; 88 - if (!folio_test_writeback(folio)) { 89 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 - len, start, folio->index, end); 91 - continue; 92 - } 93 - 94 - folio_detach_private(folio); 95 - folio_end_writeback(folio); 279 + misalignment = rreq->start & (rsize - 1); 280 + if (misalignment) { 281 + rreq->start -= misalignment; 282 + rreq->len += misalignment; 96 283 } 97 284 98 - rcu_read_unlock(); 285 + rreq->len = round_up(rreq->len, rsize); 286 + if (rreq->start < i_size && rreq->len > i_size - rreq->start) 287 + rreq->len = i_size - rreq->start; 99 288 } 100 289 101 290 /* 102 - * Failure of write to server. 291 + * Completion of a request operation. 103 292 */ 104 - void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 293 + static void cifs_rreq_done(struct netfs_io_request *rreq) 105 294 { 106 - struct address_space *mapping = inode->i_mapping; 107 - struct folio *folio; 108 - pgoff_t end; 295 + struct timespec64 atime, mtime; 296 + struct inode *inode = rreq->inode; 109 297 110 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 - 112 - if (!len) 113 - return; 114 - 115 - rcu_read_lock(); 116 - 117 - end = (start + len - 1) / PAGE_SIZE; 118 - xas_for_each(&xas, folio, end) { 119 - if (xas_retry(&xas, folio)) 120 - continue; 121 - if (!folio_test_writeback(folio)) { 122 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 - len, start, folio->index, end); 124 - continue; 125 - } 126 - 127 - folio_set_error(folio); 128 - folio_end_writeback(folio); 129 - } 130 - 131 - rcu_read_unlock(); 298 + /* we do not want atime to be less than mtime, it broke some apps */ 299 + atime = inode_set_atime_to_ts(inode, current_time(inode)); 300 + mtime = inode_get_mtime(inode); 301 + if (timespec64_compare(&atime, &mtime)) 302 + inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 132 303 } 133 304 134 - /* 135 - * Redirty pages after a temporary failure. 136 - */ 137 - void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 305 + static void cifs_post_modify(struct inode *inode) 138 306 { 139 - struct address_space *mapping = inode->i_mapping; 140 - struct folio *folio; 141 - pgoff_t end; 307 + /* Indication to update ctime and mtime as close is deferred */ 308 + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 309 + } 142 310 143 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 311 + static void cifs_free_request(struct netfs_io_request *rreq) 312 + { 313 + struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); 144 314 145 - if (!len) 146 - return; 315 + if (req->cfile) 316 + cifsFileInfo_put(req->cfile); 317 + } 147 318 148 - rcu_read_lock(); 319 + static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) 320 + { 321 + struct cifs_io_subrequest *rdata = 322 + container_of(subreq, struct cifs_io_subrequest, subreq); 323 + int rc = subreq->error; 149 324 150 - end = (start + len - 1) / PAGE_SIZE; 151 - xas_for_each(&xas, folio, end) { 152 - if (!folio_test_writeback(folio)) { 153 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 - len, start, folio->index, end); 155 - continue; 325 + if (rdata->subreq.source == NETFS_DOWNLOAD_FROM_SERVER) { 326 + #ifdef CONFIG_CIFS_SMB_DIRECT 327 + if (rdata->mr) { 328 + smbd_deregister_mr(rdata->mr); 329 + rdata->mr = NULL; 156 330 } 157 - 158 - filemap_dirty_folio(folio->mapping, folio); 159 - folio_end_writeback(folio); 331 + #endif 160 332 } 161 333 162 - rcu_read_unlock(); 334 + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); 335 + if (rdata->have_xid) 336 + free_xid(rdata->xid); 163 337 } 338 + 339 + const struct netfs_request_ops cifs_req_ops = { 340 + .request_pool = &cifs_io_request_pool, 341 + .subrequest_pool = &cifs_io_subrequest_pool, 342 + .init_request = cifs_init_request, 343 + .free_request = cifs_free_request, 344 + .free_subrequest = cifs_free_subrequest, 345 + .expand_readahead = cifs_expand_readahead, 346 + .clamp_length = cifs_clamp_length, 347 + .issue_read = cifs_req_issue_read, 348 + .done = cifs_rreq_done, 349 + .post_modify = cifs_post_modify, 350 + .begin_writeback = cifs_begin_writeback, 351 + .prepare_write = cifs_prepare_write, 352 + .issue_write = cifs_issue_write, 353 + }; 164 354 165 355 /* 166 356 * Mark as invalid, all open files on tree connections since they ··· 2397 2207 return rc; 2398 2208 } 2399 2209 2400 - /* 2401 - * update the file size (if needed) after a write. Should be called with 2402 - * the inode->i_lock held 2403 - */ 2404 - void 2405 - cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2406 - unsigned int bytes_written) 2210 + void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, 2211 + bool was_async) 2407 2212 { 2408 - loff_t end_of_write = offset + bytes_written; 2213 + struct netfs_io_request *wreq = wdata->rreq; 2214 + loff_t new_server_eof; 2409 2215 2410 - if (end_of_write > cifsi->netfs.remote_i_size) 2411 - netfs_resize_file(&cifsi->netfs, end_of_write, true); 2412 - } 2216 + if (result > 0) { 2217 + new_server_eof = wdata->subreq.start + wdata->subreq.transferred + result; 2413 2218 2414 - static ssize_t 2415 - cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2416 - size_t write_size, loff_t *offset) 2417 - { 2418 - int rc = 0; 2419 - unsigned int bytes_written = 0; 2420 - unsigned int total_written; 2421 - struct cifs_tcon *tcon; 2422 - struct TCP_Server_Info *server; 2423 - unsigned int xid; 2424 - struct dentry *dentry = open_file->dentry; 2425 - struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2426 - struct cifs_io_parms io_parms = {0}; 2427 - 2428 - cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2429 - write_size, *offset, dentry); 2430 - 2431 - tcon = tlink_tcon(open_file->tlink); 2432 - server = tcon->ses->server; 2433 - 2434 - if (!server->ops->sync_write) 2435 - return -ENOSYS; 2436 - 2437 - xid = get_xid(); 2438 - 2439 - for (total_written = 0; write_size > total_written; 2440 - total_written += bytes_written) { 2441 - rc = -EAGAIN; 2442 - while (rc == -EAGAIN) { 2443 - struct kvec iov[2]; 2444 - unsigned int len; 2445 - 2446 - if (open_file->invalidHandle) { 2447 - /* we could deadlock if we called 2448 - filemap_fdatawait from here so tell 2449 - reopen_file not to flush data to 2450 - server now */ 2451 - rc = cifs_reopen_file(open_file, false); 2452 - if (rc != 0) 2453 - break; 2454 - } 2455 - 2456 - len = min(server->ops->wp_retry_size(d_inode(dentry)), 2457 - (unsigned int)write_size - total_written); 2458 - /* iov[0] is reserved for smb header */ 2459 - iov[1].iov_base = (char *)write_data + total_written; 2460 - iov[1].iov_len = len; 2461 - io_parms.pid = pid; 2462 - io_parms.tcon = tcon; 2463 - io_parms.offset = *offset; 2464 - io_parms.length = len; 2465 - rc = server->ops->sync_write(xid, &open_file->fid, 2466 - &io_parms, &bytes_written, iov, 1); 2467 - } 2468 - if (rc || (bytes_written == 0)) { 2469 - if (total_written) 2470 - break; 2471 - else { 2472 - free_xid(xid); 2473 - return rc; 2474 - } 2475 - } else { 2476 - spin_lock(&d_inode(dentry)->i_lock); 2477 - cifs_update_eof(cifsi, *offset, bytes_written); 2478 - spin_unlock(&d_inode(dentry)->i_lock); 2479 - *offset += bytes_written; 2480 - } 2219 + if (new_server_eof > netfs_inode(wreq->inode)->remote_i_size) 2220 + netfs_resize_file(netfs_inode(wreq->inode), new_server_eof, true); 2481 2221 } 2482 2222 2483 - cifs_stats_bytes_written(tcon, total_written); 2484 - 2485 - if (total_written > 0) { 2486 - spin_lock(&d_inode(dentry)->i_lock); 2487 - if (*offset > d_inode(dentry)->i_size) { 2488 - i_size_write(d_inode(dentry), *offset); 2489 - d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2490 - } 2491 - spin_unlock(&d_inode(dentry)->i_lock); 2492 - } 2493 - mark_inode_dirty_sync(d_inode(dentry)); 2494 - free_xid(xid); 2495 - return total_written; 2223 + netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); 2496 2224 } 2497 2225 2498 2226 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, ··· 2617 2509 return -ENOENT; 2618 2510 } 2619 2511 2620 - void 2621 - cifs_writedata_release(struct kref *refcount) 2622 - { 2623 - struct cifs_writedata *wdata = container_of(refcount, 2624 - struct cifs_writedata, refcount); 2625 - #ifdef CONFIG_CIFS_SMB_DIRECT 2626 - if (wdata->mr) { 2627 - smbd_deregister_mr(wdata->mr); 2628 - wdata->mr = NULL; 2629 - } 2630 - #endif 2631 - 2632 - if (wdata->cfile) 2633 - cifsFileInfo_put(wdata->cfile); 2634 - 2635 - kfree(wdata); 2636 - } 2637 - 2638 2512 /* 2639 - * Write failed with a retryable error. Resend the write request. It's also 2640 - * possible that the page was redirtied so re-clean the page. 2513 + * Flush data on a strict file. 2641 2514 */ 2642 - static void 2643 - cifs_writev_requeue(struct cifs_writedata *wdata) 2644 - { 2645 - int rc = 0; 2646 - struct inode *inode = d_inode(wdata->cfile->dentry); 2647 - struct TCP_Server_Info *server; 2648 - unsigned int rest_len = wdata->bytes; 2649 - loff_t fpos = wdata->offset; 2650 - 2651 - server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2652 - do { 2653 - struct cifs_writedata *wdata2; 2654 - unsigned int wsize, cur_len; 2655 - 2656 - wsize = server->ops->wp_retry_size(inode); 2657 - if (wsize < rest_len) { 2658 - if (wsize < PAGE_SIZE) { 2659 - rc = -EOPNOTSUPP; 2660 - break; 2661 - } 2662 - cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2663 - } else { 2664 - cur_len = rest_len; 2665 - } 2666 - 2667 - wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2668 - if (!wdata2) { 2669 - rc = -ENOMEM; 2670 - break; 2671 - } 2672 - 2673 - wdata2->sync_mode = wdata->sync_mode; 2674 - wdata2->offset = fpos; 2675 - wdata2->bytes = cur_len; 2676 - wdata2->iter = wdata->iter; 2677 - 2678 - iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2679 - iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2680 - 2681 - if (iov_iter_is_xarray(&wdata2->iter)) 2682 - /* Check for pages having been redirtied and clean 2683 - * them. We can do this by walking the xarray. If 2684 - * it's not an xarray, then it's a DIO and we shouldn't 2685 - * be mucking around with the page bits. 2686 - */ 2687 - cifs_undirty_folios(inode, fpos, cur_len); 2688 - 2689 - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2690 - &wdata2->cfile); 2691 - if (!wdata2->cfile) { 2692 - cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2693 - rc); 2694 - if (!is_retryable_error(rc)) 2695 - rc = -EBADF; 2696 - } else { 2697 - wdata2->pid = wdata2->cfile->pid; 2698 - rc = server->ops->async_writev(wdata2, 2699 - cifs_writedata_release); 2700 - } 2701 - 2702 - kref_put(&wdata2->refcount, cifs_writedata_release); 2703 - if (rc) { 2704 - if (is_retryable_error(rc)) 2705 - continue; 2706 - fpos += cur_len; 2707 - rest_len -= cur_len; 2708 - break; 2709 - } 2710 - 2711 - fpos += cur_len; 2712 - rest_len -= cur_len; 2713 - } while (rest_len > 0); 2714 - 2715 - /* Clean up remaining pages from the original wdata */ 2716 - if (iov_iter_is_xarray(&wdata->iter)) 2717 - cifs_pages_write_failed(inode, fpos, rest_len); 2718 - 2719 - if (rc != 0 && !is_retryable_error(rc)) 2720 - mapping_set_error(inode->i_mapping, rc); 2721 - kref_put(&wdata->refcount, cifs_writedata_release); 2722 - } 2723 - 2724 - void 2725 - cifs_writev_complete(struct work_struct *work) 2726 - { 2727 - struct cifs_writedata *wdata = container_of(work, 2728 - struct cifs_writedata, work); 2729 - struct inode *inode = d_inode(wdata->cfile->dentry); 2730 - 2731 - if (wdata->result == 0) { 2732 - spin_lock(&inode->i_lock); 2733 - cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2734 - spin_unlock(&inode->i_lock); 2735 - cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2736 - wdata->bytes); 2737 - } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2738 - return cifs_writev_requeue(wdata); 2739 - 2740 - if (wdata->result == -EAGAIN) 2741 - cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2742 - else if (wdata->result < 0) 2743 - cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2744 - else 2745 - cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2746 - 2747 - if (wdata->result != -EAGAIN) 2748 - mapping_set_error(inode->i_mapping, wdata->result); 2749 - kref_put(&wdata->refcount, cifs_writedata_release); 2750 - } 2751 - 2752 - struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2753 - { 2754 - struct cifs_writedata *wdata; 2755 - 2756 - wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2757 - if (wdata != NULL) { 2758 - kref_init(&wdata->refcount); 2759 - INIT_LIST_HEAD(&wdata->list); 2760 - init_completion(&wdata->done); 2761 - INIT_WORK(&wdata->work, complete); 2762 - } 2763 - return wdata; 2764 - } 2765 - 2766 - static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2767 - { 2768 - struct address_space *mapping = page->mapping; 2769 - loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2770 - char *write_data; 2771 - int rc = -EFAULT; 2772 - int bytes_written = 0; 2773 - struct inode *inode; 2774 - struct cifsFileInfo *open_file; 2775 - 2776 - if (!mapping || !mapping->host) 2777 - return -EFAULT; 2778 - 2779 - inode = page->mapping->host; 2780 - 2781 - offset += (loff_t)from; 2782 - write_data = kmap(page); 2783 - write_data += from; 2784 - 2785 - if ((to > PAGE_SIZE) || (from > to)) { 2786 - kunmap(page); 2787 - return -EIO; 2788 - } 2789 - 2790 - /* racing with truncate? */ 2791 - if (offset > mapping->host->i_size) { 2792 - kunmap(page); 2793 - return 0; /* don't care */ 2794 - } 2795 - 2796 - /* check to make sure that we are not extending the file */ 2797 - if (mapping->host->i_size - offset < (loff_t)to) 2798 - to = (unsigned)(mapping->host->i_size - offset); 2799 - 2800 - rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2801 - &open_file); 2802 - if (!rc) { 2803 - bytes_written = cifs_write(open_file, open_file->pid, 2804 - write_data, to - from, &offset); 2805 - cifsFileInfo_put(open_file); 2806 - /* Does mm or vfs already set times? */ 2807 - simple_inode_init_ts(inode); 2808 - if ((bytes_written > 0) && (offset)) 2809 - rc = 0; 2810 - else if (bytes_written < 0) 2811 - rc = bytes_written; 2812 - else 2813 - rc = -EFAULT; 2814 - } else { 2815 - cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2816 - if (!is_retryable_error(rc)) 2817 - rc = -EIO; 2818 - } 2819 - 2820 - kunmap(page); 2821 - return rc; 2822 - } 2823 - 2824 - /* 2825 - * Extend the region to be written back to include subsequent contiguously 2826 - * dirty pages if possible, but don't sleep while doing so. 2827 - */ 2828 - static void cifs_extend_writeback(struct address_space *mapping, 2829 - struct xa_state *xas, 2830 - long *_count, 2831 - loff_t start, 2832 - int max_pages, 2833 - loff_t max_len, 2834 - size_t *_len) 2835 - { 2836 - struct folio_batch batch; 2837 - struct folio *folio; 2838 - unsigned int nr_pages; 2839 - pgoff_t index = (start + *_len) / PAGE_SIZE; 2840 - size_t len; 2841 - bool stop = true; 2842 - unsigned int i; 2843 - 2844 - folio_batch_init(&batch); 2845 - 2846 - do { 2847 - /* Firstly, we gather up a batch of contiguous dirty pages 2848 - * under the RCU read lock - but we can't clear the dirty flags 2849 - * there if any of those pages are mapped. 2850 - */ 2851 - rcu_read_lock(); 2852 - 2853 - xas_for_each(xas, folio, ULONG_MAX) { 2854 - stop = true; 2855 - if (xas_retry(xas, folio)) 2856 - continue; 2857 - if (xa_is_value(folio)) 2858 - break; 2859 - if (folio->index != index) { 2860 - xas_reset(xas); 2861 - break; 2862 - } 2863 - 2864 - if (!folio_try_get_rcu(folio)) { 2865 - xas_reset(xas); 2866 - continue; 2867 - } 2868 - nr_pages = folio_nr_pages(folio); 2869 - if (nr_pages > max_pages) { 2870 - xas_reset(xas); 2871 - break; 2872 - } 2873 - 2874 - /* Has the page moved or been split? */ 2875 - if (unlikely(folio != xas_reload(xas))) { 2876 - folio_put(folio); 2877 - xas_reset(xas); 2878 - break; 2879 - } 2880 - 2881 - if (!folio_trylock(folio)) { 2882 - folio_put(folio); 2883 - xas_reset(xas); 2884 - break; 2885 - } 2886 - if (!folio_test_dirty(folio) || 2887 - folio_test_writeback(folio)) { 2888 - folio_unlock(folio); 2889 - folio_put(folio); 2890 - xas_reset(xas); 2891 - break; 2892 - } 2893 - 2894 - max_pages -= nr_pages; 2895 - len = folio_size(folio); 2896 - stop = false; 2897 - 2898 - index += nr_pages; 2899 - *_count -= nr_pages; 2900 - *_len += len; 2901 - if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2902 - stop = true; 2903 - 2904 - if (!folio_batch_add(&batch, folio)) 2905 - break; 2906 - if (stop) 2907 - break; 2908 - } 2909 - 2910 - xas_pause(xas); 2911 - rcu_read_unlock(); 2912 - 2913 - /* Now, if we obtained any pages, we can shift them to being 2914 - * writable and mark them for caching. 2915 - */ 2916 - if (!folio_batch_count(&batch)) 2917 - break; 2918 - 2919 - for (i = 0; i < folio_batch_count(&batch); i++) { 2920 - folio = batch.folios[i]; 2921 - /* The folio should be locked, dirty and not undergoing 2922 - * writeback from the loop above. 2923 - */ 2924 - if (!folio_clear_dirty_for_io(folio)) 2925 - WARN_ON(1); 2926 - folio_start_writeback(folio); 2927 - folio_unlock(folio); 2928 - } 2929 - 2930 - folio_batch_release(&batch); 2931 - cond_resched(); 2932 - } while (!stop); 2933 - } 2934 - 2935 - /* 2936 - * Write back the locked page and any subsequent non-locked dirty pages. 2937 - */ 2938 - static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2939 - struct writeback_control *wbc, 2940 - struct xa_state *xas, 2941 - struct folio *folio, 2942 - unsigned long long start, 2943 - unsigned long long end) 2944 - { 2945 - struct inode *inode = mapping->host; 2946 - struct TCP_Server_Info *server; 2947 - struct cifs_writedata *wdata; 2948 - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2949 - struct cifs_credits credits_on_stack; 2950 - struct cifs_credits *credits = &credits_on_stack; 2951 - struct cifsFileInfo *cfile = NULL; 2952 - unsigned long long i_size = i_size_read(inode), max_len; 2953 - unsigned int xid, wsize; 2954 - size_t len = folio_size(folio); 2955 - long count = wbc->nr_to_write; 2956 - int rc; 2957 - 2958 - /* The folio should be locked, dirty and not undergoing writeback. */ 2959 - if (!folio_clear_dirty_for_io(folio)) 2960 - WARN_ON_ONCE(1); 2961 - folio_start_writeback(folio); 2962 - 2963 - count -= folio_nr_pages(folio); 2964 - 2965 - xid = get_xid(); 2966 - server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2967 - 2968 - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2969 - if (rc) { 2970 - cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2971 - goto err_xid; 2972 - } 2973 - 2974 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2975 - &wsize, credits); 2976 - if (rc != 0) 2977 - goto err_close; 2978 - 2979 - wdata = cifs_writedata_alloc(cifs_writev_complete); 2980 - if (!wdata) { 2981 - rc = -ENOMEM; 2982 - goto err_uncredit; 2983 - } 2984 - 2985 - wdata->sync_mode = wbc->sync_mode; 2986 - wdata->offset = folio_pos(folio); 2987 - wdata->pid = cfile->pid; 2988 - wdata->credits = credits_on_stack; 2989 - wdata->cfile = cfile; 2990 - wdata->server = server; 2991 - cfile = NULL; 2992 - 2993 - /* Find all consecutive lockable dirty pages that have contiguous 2994 - * written regions, stopping when we find a page that is not 2995 - * immediately lockable, is not dirty or is missing, or we reach the 2996 - * end of the range. 2997 - */ 2998 - if (start < i_size) { 2999 - /* Trim the write to the EOF; the extra data is ignored. Also 3000 - * put an upper limit on the size of a single storedata op. 3001 - */ 3002 - max_len = wsize; 3003 - max_len = min_t(unsigned long long, max_len, end - start + 1); 3004 - max_len = min_t(unsigned long long, max_len, i_size - start); 3005 - 3006 - if (len < max_len) { 3007 - int max_pages = INT_MAX; 3008 - 3009 - #ifdef CONFIG_CIFS_SMB_DIRECT 3010 - if (server->smbd_conn) 3011 - max_pages = server->smbd_conn->max_frmr_depth; 3012 - #endif 3013 - max_pages -= folio_nr_pages(folio); 3014 - 3015 - if (max_pages > 0) 3016 - cifs_extend_writeback(mapping, xas, &count, start, 3017 - max_pages, max_len, &len); 3018 - } 3019 - } 3020 - len = min_t(unsigned long long, len, i_size - start); 3021 - 3022 - /* We now have a contiguous set of dirty pages, each with writeback 3023 - * set; the first page is still locked at this point, but all the rest 3024 - * have been unlocked. 3025 - */ 3026 - folio_unlock(folio); 3027 - wdata->bytes = len; 3028 - 3029 - if (start < i_size) { 3030 - iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 3031 - start, len); 3032 - 3033 - rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 3034 - if (rc) 3035 - goto err_wdata; 3036 - 3037 - if (wdata->cfile->invalidHandle) 3038 - rc = -EAGAIN; 3039 - else 3040 - rc = wdata->server->ops->async_writev(wdata, 3041 - cifs_writedata_release); 3042 - if (rc >= 0) { 3043 - kref_put(&wdata->refcount, cifs_writedata_release); 3044 - goto err_close; 3045 - } 3046 - } else { 3047 - /* The dirty region was entirely beyond the EOF. */ 3048 - cifs_pages_written_back(inode, start, len); 3049 - rc = 0; 3050 - } 3051 - 3052 - err_wdata: 3053 - kref_put(&wdata->refcount, cifs_writedata_release); 3054 - err_uncredit: 3055 - add_credits_and_wake_if(server, credits, 0); 3056 - err_close: 3057 - if (cfile) 3058 - cifsFileInfo_put(cfile); 3059 - err_xid: 3060 - free_xid(xid); 3061 - if (rc == 0) { 3062 - wbc->nr_to_write = count; 3063 - rc = len; 3064 - } else if (is_retryable_error(rc)) { 3065 - cifs_pages_write_redirty(inode, start, len); 3066 - } else { 3067 - cifs_pages_write_failed(inode, start, len); 3068 - mapping_set_error(mapping, rc); 3069 - } 3070 - /* Indication to update ctime and mtime as close is deferred */ 3071 - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3072 - return rc; 3073 - } 3074 - 3075 - /* 3076 - * write a region of pages back to the server 3077 - */ 3078 - static ssize_t cifs_writepages_begin(struct address_space *mapping, 3079 - struct writeback_control *wbc, 3080 - struct xa_state *xas, 3081 - unsigned long long *_start, 3082 - unsigned long long end) 3083 - { 3084 - struct folio *folio; 3085 - unsigned long long start = *_start; 3086 - ssize_t ret; 3087 - int skips = 0; 3088 - 3089 - search_again: 3090 - /* Find the first dirty page. */ 3091 - rcu_read_lock(); 3092 - 3093 - for (;;) { 3094 - folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 3095 - if (xas_retry(xas, folio) || xa_is_value(folio)) 3096 - continue; 3097 - if (!folio) 3098 - break; 3099 - 3100 - if (!folio_try_get_rcu(folio)) { 3101 - xas_reset(xas); 3102 - continue; 3103 - } 3104 - 3105 - if (unlikely(folio != xas_reload(xas))) { 3106 - folio_put(folio); 3107 - xas_reset(xas); 3108 - continue; 3109 - } 3110 - 3111 - xas_pause(xas); 3112 - break; 3113 - } 3114 - rcu_read_unlock(); 3115 - if (!folio) 3116 - return 0; 3117 - 3118 - start = folio_pos(folio); /* May regress with THPs */ 3119 - 3120 - /* At this point we hold neither the i_pages lock nor the page lock: 3121 - * the page may be truncated or invalidated (changing page->mapping to 3122 - * NULL), or even swizzled back from swapper_space to tmpfs file 3123 - * mapping 3124 - */ 3125 - lock_again: 3126 - if (wbc->sync_mode != WB_SYNC_NONE) { 3127 - ret = folio_lock_killable(folio); 3128 - if (ret < 0) 3129 - return ret; 3130 - } else { 3131 - if (!folio_trylock(folio)) 3132 - goto search_again; 3133 - } 3134 - 3135 - if (folio->mapping != mapping || 3136 - !folio_test_dirty(folio)) { 3137 - start += folio_size(folio); 3138 - folio_unlock(folio); 3139 - goto search_again; 3140 - } 3141 - 3142 - if (folio_test_writeback(folio) || 3143 - folio_test_fscache(folio)) { 3144 - folio_unlock(folio); 3145 - if (wbc->sync_mode != WB_SYNC_NONE) { 3146 - folio_wait_writeback(folio); 3147 - #ifdef CONFIG_CIFS_FSCACHE 3148 - folio_wait_fscache(folio); 3149 - #endif 3150 - goto lock_again; 3151 - } 3152 - 3153 - start += folio_size(folio); 3154 - if (wbc->sync_mode == WB_SYNC_NONE) { 3155 - if (skips >= 5 || need_resched()) { 3156 - ret = 0; 3157 - goto out; 3158 - } 3159 - skips++; 3160 - } 3161 - goto search_again; 3162 - } 3163 - 3164 - ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3165 - out: 3166 - if (ret > 0) 3167 - *_start = start + ret; 3168 - return ret; 3169 - } 3170 - 3171 - /* 3172 - * Write a region of pages back to the server 3173 - */ 3174 - static int cifs_writepages_region(struct address_space *mapping, 3175 - struct writeback_control *wbc, 3176 - unsigned long long *_start, 3177 - unsigned long long end) 3178 - { 3179 - ssize_t ret; 3180 - 3181 - XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3182 - 3183 - do { 3184 - ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3185 - if (ret > 0 && wbc->nr_to_write > 0) 3186 - cond_resched(); 3187 - } while (ret > 0 && wbc->nr_to_write > 0); 3188 - 3189 - return ret > 0 ? 0 : ret; 3190 - } 3191 - 3192 - /* 3193 - * Write some of the pending data back to the server 3194 - */ 3195 - static int cifs_writepages(struct address_space *mapping, 3196 - struct writeback_control *wbc) 3197 - { 3198 - loff_t start, end; 3199 - int ret; 3200 - 3201 - /* We have to be careful as we can end up racing with setattr() 3202 - * truncating the pagecache since the caller doesn't take a lock here 3203 - * to prevent it. 3204 - */ 3205 - 3206 - if (wbc->range_cyclic && mapping->writeback_index) { 3207 - start = mapping->writeback_index * PAGE_SIZE; 3208 - ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3209 - if (ret < 0) 3210 - goto out; 3211 - 3212 - if (wbc->nr_to_write <= 0) { 3213 - mapping->writeback_index = start / PAGE_SIZE; 3214 - goto out; 3215 - } 3216 - 3217 - start = 0; 3218 - end = mapping->writeback_index * PAGE_SIZE; 3219 - mapping->writeback_index = 0; 3220 - ret = cifs_writepages_region(mapping, wbc, &start, end); 3221 - if (ret == 0) 3222 - mapping->writeback_index = start / PAGE_SIZE; 3223 - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3224 - start = 0; 3225 - ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3226 - if (wbc->nr_to_write > 0 && ret == 0) 3227 - mapping->writeback_index = start / PAGE_SIZE; 3228 - } else { 3229 - start = wbc->range_start; 3230 - ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3231 - } 3232 - 3233 - out: 3234 - return ret; 3235 - } 3236 - 3237 - static int 3238 - cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3239 - { 3240 - int rc; 3241 - unsigned int xid; 3242 - 3243 - xid = get_xid(); 3244 - /* BB add check for wbc flags */ 3245 - get_page(page); 3246 - if (!PageUptodate(page)) 3247 - cifs_dbg(FYI, "ppw - page not up to date\n"); 3248 - 3249 - /* 3250 - * Set the "writeback" flag, and clear "dirty" in the radix tree. 3251 - * 3252 - * A writepage() implementation always needs to do either this, 3253 - * or re-dirty the page with "redirty_page_for_writepage()" in 3254 - * the case of a failure. 3255 - * 3256 - * Just unlocking the page will cause the radix tree tag-bits 3257 - * to fail to update with the state of the page correctly. 3258 - */ 3259 - set_page_writeback(page); 3260 - retry_write: 3261 - rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3262 - if (is_retryable_error(rc)) { 3263 - if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3264 - goto retry_write; 3265 - redirty_page_for_writepage(wbc, page); 3266 - } else if (rc != 0) { 3267 - SetPageError(page); 3268 - mapping_set_error(page->mapping, rc); 3269 - } else { 3270 - SetPageUptodate(page); 3271 - } 3272 - end_page_writeback(page); 3273 - put_page(page); 3274 - free_xid(xid); 3275 - return rc; 3276 - } 3277 - 3278 - static int cifs_write_end(struct file *file, struct address_space *mapping, 3279 - loff_t pos, unsigned len, unsigned copied, 3280 - struct page *page, void *fsdata) 3281 - { 3282 - int rc; 3283 - struct inode *inode = mapping->host; 3284 - struct cifsFileInfo *cfile = file->private_data; 3285 - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3286 - struct folio *folio = page_folio(page); 3287 - __u32 pid; 3288 - 3289 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3290 - pid = cfile->pid; 3291 - else 3292 - pid = current->tgid; 3293 - 3294 - cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3295 - page, pos, copied); 3296 - 3297 - if (folio_test_checked(folio)) { 3298 - if (copied == len) 3299 - folio_mark_uptodate(folio); 3300 - folio_clear_checked(folio); 3301 - } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3302 - folio_mark_uptodate(folio); 3303 - 3304 - if (!folio_test_uptodate(folio)) { 3305 - char *page_data; 3306 - unsigned offset = pos & (PAGE_SIZE - 1); 3307 - unsigned int xid; 3308 - 3309 - xid = get_xid(); 3310 - /* this is probably better than directly calling 3311 - partialpage_write since in this function the file handle is 3312 - known which we might as well leverage */ 3313 - /* BB check if anything else missing out of ppw 3314 - such as updating last write time */ 3315 - page_data = kmap(page); 3316 - rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3317 - /* if (rc < 0) should we set writebehind rc? */ 3318 - kunmap(page); 3319 - 3320 - free_xid(xid); 3321 - } else { 3322 - rc = copied; 3323 - pos += copied; 3324 - set_page_dirty(page); 3325 - } 3326 - 3327 - if (rc > 0) { 3328 - spin_lock(&inode->i_lock); 3329 - if (pos > inode->i_size) { 3330 - loff_t additional_blocks = (512 - 1 + copied) >> 9; 3331 - 3332 - i_size_write(inode, pos); 3333 - /* 3334 - * Estimate new allocation size based on the amount written. 3335 - * This will be updated from server on close (and on queryinfo) 3336 - */ 3337 - inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3338 - inode->i_blocks + additional_blocks); 3339 - } 3340 - spin_unlock(&inode->i_lock); 3341 - } 3342 - 3343 - unlock_page(page); 3344 - put_page(page); 3345 - /* Indication to update ctime and mtime as close is deferred */ 3346 - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3347 - 3348 - return rc; 3349 - } 3350 - 3351 2515 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3352 2516 int datasync) 3353 2517 { ··· 2674 3294 return rc; 2675 3295 } 2676 3296 3297 + /* 3298 + * Flush data on a non-strict data. 3299 + */ 2677 3300 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 2678 3301 { 2679 3302 unsigned int xid; ··· 2743 3360 return rc; 2744 3361 } 2745 3362 2746 - static void 2747 - cifs_uncached_writedata_release(struct kref *refcount) 2748 - { 2749 - struct cifs_writedata *wdata = container_of(refcount, 2750 - struct cifs_writedata, refcount); 2751 - 2752 - kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 2753 - cifs_writedata_release(refcount); 2754 - } 2755 - 2756 - static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 2757 - 2758 - static void 2759 - cifs_uncached_writev_complete(struct work_struct *work) 2760 - { 2761 - struct cifs_writedata *wdata = container_of(work, 2762 - struct cifs_writedata, work); 2763 - struct inode *inode = d_inode(wdata->cfile->dentry); 2764 - struct cifsInodeInfo *cifsi = CIFS_I(inode); 2765 - 2766 - spin_lock(&inode->i_lock); 2767 - cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 2768 - if (cifsi->netfs.remote_i_size > inode->i_size) 2769 - i_size_write(inode, cifsi->netfs.remote_i_size); 2770 - spin_unlock(&inode->i_lock); 2771 - 2772 - complete(&wdata->done); 2773 - collect_uncached_write_data(wdata->ctx); 2774 - /* the below call can possibly free the last ref to aio ctx */ 2775 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2776 - } 2777 - 2778 - static int 2779 - cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 2780 - struct cifs_aio_ctx *ctx) 2781 - { 2782 - unsigned int wsize; 2783 - struct cifs_credits credits; 2784 - int rc; 2785 - struct TCP_Server_Info *server = wdata->server; 2786 - 2787 - do { 2788 - if (wdata->cfile->invalidHandle) { 2789 - rc = cifs_reopen_file(wdata->cfile, false); 2790 - if (rc == -EAGAIN) 2791 - continue; 2792 - else if (rc) 2793 - break; 2794 - } 2795 - 2796 - 2797 - /* 2798 - * Wait for credits to resend this wdata. 2799 - * Note: we are attempting to resend the whole wdata not in 2800 - * segments 2801 - */ 2802 - do { 2803 - rc = server->ops->wait_mtu_credits(server, wdata->bytes, 2804 - &wsize, &credits); 2805 - if (rc) 2806 - goto fail; 2807 - 2808 - if (wsize < wdata->bytes) { 2809 - add_credits_and_wake_if(server, &credits, 0); 2810 - msleep(1000); 2811 - } 2812 - } while (wsize < wdata->bytes); 2813 - wdata->credits = credits; 2814 - 2815 - rc = adjust_credits(server, &wdata->credits, wdata->bytes); 2816 - 2817 - if (!rc) { 2818 - if (wdata->cfile->invalidHandle) 2819 - rc = -EAGAIN; 2820 - else { 2821 - wdata->replay = true; 2822 - #ifdef CONFIG_CIFS_SMB_DIRECT 2823 - if (wdata->mr) { 2824 - wdata->mr->need_invalidate = true; 2825 - smbd_deregister_mr(wdata->mr); 2826 - wdata->mr = NULL; 2827 - } 2828 - #endif 2829 - rc = server->ops->async_writev(wdata, 2830 - cifs_uncached_writedata_release); 2831 - } 2832 - } 2833 - 2834 - /* If the write was successfully sent, we are done */ 2835 - if (!rc) { 2836 - list_add_tail(&wdata->list, wdata_list); 2837 - return 0; 2838 - } 2839 - 2840 - /* Roll back credits and retry if needed */ 2841 - add_credits_and_wake_if(server, &wdata->credits, 0); 2842 - } while (rc == -EAGAIN); 2843 - 2844 - fail: 2845 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2846 - return rc; 2847 - } 2848 - 2849 - /* 2850 - * Select span of a bvec iterator we're going to use. Limit it by both maximum 2851 - * size and maximum number of segments. 2852 - */ 2853 - static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 2854 - size_t max_segs, unsigned int *_nsegs) 2855 - { 2856 - const struct bio_vec *bvecs = iter->bvec; 2857 - unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 2858 - size_t len, span = 0, n = iter->count; 2859 - size_t skip = iter->iov_offset; 2860 - 2861 - if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 2862 - return 0; 2863 - 2864 - while (n && ix < nbv && skip) { 2865 - len = bvecs[ix].bv_len; 2866 - if (skip < len) 2867 - break; 2868 - skip -= len; 2869 - n -= len; 2870 - ix++; 2871 - } 2872 - 2873 - while (n && ix < nbv) { 2874 - len = min3(n, bvecs[ix].bv_len - skip, max_size); 2875 - span += len; 2876 - max_size -= len; 2877 - nsegs++; 2878 - ix++; 2879 - if (max_size == 0 || nsegs >= max_segs) 2880 - break; 2881 - skip = 0; 2882 - n -= len; 2883 - } 2884 - 2885 - *_nsegs = nsegs; 2886 - return span; 2887 - } 2888 - 2889 - static int 2890 - cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 2891 - struct cifsFileInfo *open_file, 2892 - struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 2893 - struct cifs_aio_ctx *ctx) 2894 - { 2895 - int rc = 0; 2896 - size_t cur_len, max_len; 2897 - struct cifs_writedata *wdata; 2898 - pid_t pid; 2899 - struct TCP_Server_Info *server; 2900 - unsigned int xid, max_segs = INT_MAX; 2901 - 2902 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2903 - pid = open_file->pid; 2904 - else 2905 - pid = current->tgid; 2906 - 2907 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 2908 - xid = get_xid(); 2909 - 2910 - #ifdef CONFIG_CIFS_SMB_DIRECT 2911 - if (server->smbd_conn) 2912 - max_segs = server->smbd_conn->max_frmr_depth; 2913 - #endif 2914 - 2915 - do { 2916 - struct cifs_credits credits_on_stack; 2917 - struct cifs_credits *credits = &credits_on_stack; 2918 - unsigned int wsize, nsegs = 0; 2919 - 2920 - if (signal_pending(current)) { 2921 - rc = -EINTR; 2922 - break; 2923 - } 2924 - 2925 - if (open_file->invalidHandle) { 2926 - rc = cifs_reopen_file(open_file, false); 2927 - if (rc == -EAGAIN) 2928 - continue; 2929 - else if (rc) 2930 - break; 2931 - } 2932 - 2933 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2934 - &wsize, credits); 2935 - if (rc) 2936 - break; 2937 - 2938 - max_len = min_t(const size_t, len, wsize); 2939 - if (!max_len) { 2940 - rc = -EAGAIN; 2941 - add_credits_and_wake_if(server, credits, 0); 2942 - break; 2943 - } 2944 - 2945 - cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 2946 - cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 2947 - cur_len, max_len, nsegs, from->nr_segs, max_segs); 2948 - if (cur_len == 0) { 2949 - rc = -EIO; 2950 - add_credits_and_wake_if(server, credits, 0); 2951 - break; 2952 - } 2953 - 2954 - wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 2955 - if (!wdata) { 2956 - rc = -ENOMEM; 2957 - add_credits_and_wake_if(server, credits, 0); 2958 - break; 2959 - } 2960 - 2961 - wdata->sync_mode = WB_SYNC_ALL; 2962 - wdata->offset = (__u64)fpos; 2963 - wdata->cfile = cifsFileInfo_get(open_file); 2964 - wdata->server = server; 2965 - wdata->pid = pid; 2966 - wdata->bytes = cur_len; 2967 - wdata->credits = credits_on_stack; 2968 - wdata->iter = *from; 2969 - wdata->ctx = ctx; 2970 - kref_get(&ctx->refcount); 2971 - 2972 - iov_iter_truncate(&wdata->iter, cur_len); 2973 - 2974 - rc = adjust_credits(server, &wdata->credits, wdata->bytes); 2975 - 2976 - if (!rc) { 2977 - if (wdata->cfile->invalidHandle) 2978 - rc = -EAGAIN; 2979 - else 2980 - rc = server->ops->async_writev(wdata, 2981 - cifs_uncached_writedata_release); 2982 - } 2983 - 2984 - if (rc) { 2985 - add_credits_and_wake_if(server, &wdata->credits, 0); 2986 - kref_put(&wdata->refcount, 2987 - cifs_uncached_writedata_release); 2988 - if (rc == -EAGAIN) 2989 - continue; 2990 - break; 2991 - } 2992 - 2993 - list_add_tail(&wdata->list, wdata_list); 2994 - iov_iter_advance(from, cur_len); 2995 - fpos += cur_len; 2996 - len -= cur_len; 2997 - } while (len > 0); 2998 - 2999 - free_xid(xid); 3000 - return rc; 3001 - } 3002 - 3003 - static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3004 - { 3005 - struct cifs_writedata *wdata, *tmp; 3006 - struct cifs_tcon *tcon; 3007 - struct cifs_sb_info *cifs_sb; 3008 - struct dentry *dentry = ctx->cfile->dentry; 3009 - ssize_t rc; 3010 - 3011 - tcon = tlink_tcon(ctx->cfile->tlink); 3012 - cifs_sb = CIFS_SB(dentry->d_sb); 3013 - 3014 - mutex_lock(&ctx->aio_mutex); 3015 - 3016 - if (list_empty(&ctx->list)) { 3017 - mutex_unlock(&ctx->aio_mutex); 3018 - return; 3019 - } 3020 - 3021 - rc = ctx->rc; 3022 - /* 3023 - * Wait for and collect replies for any successful sends in order of 3024 - * increasing offset. Once an error is hit, then return without waiting 3025 - * for any more replies. 3026 - */ 3027 - restart_loop: 3028 - list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3029 - if (!rc) { 3030 - if (!try_wait_for_completion(&wdata->done)) { 3031 - mutex_unlock(&ctx->aio_mutex); 3032 - return; 3033 - } 3034 - 3035 - if (wdata->result) 3036 - rc = wdata->result; 3037 - else 3038 - ctx->total_len += wdata->bytes; 3039 - 3040 - /* resend call if it's a retryable error */ 3041 - if (rc == -EAGAIN) { 3042 - struct list_head tmp_list; 3043 - struct iov_iter tmp_from = ctx->iter; 3044 - 3045 - INIT_LIST_HEAD(&tmp_list); 3046 - list_del_init(&wdata->list); 3047 - 3048 - if (ctx->direct_io) 3049 - rc = cifs_resend_wdata( 3050 - wdata, &tmp_list, ctx); 3051 - else { 3052 - iov_iter_advance(&tmp_from, 3053 - wdata->offset - ctx->pos); 3054 - 3055 - rc = cifs_write_from_iter(wdata->offset, 3056 - wdata->bytes, &tmp_from, 3057 - ctx->cfile, cifs_sb, &tmp_list, 3058 - ctx); 3059 - 3060 - kref_put(&wdata->refcount, 3061 - cifs_uncached_writedata_release); 3062 - } 3063 - 3064 - list_splice(&tmp_list, &ctx->list); 3065 - goto restart_loop; 3066 - } 3067 - } 3068 - list_del_init(&wdata->list); 3069 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3070 - } 3071 - 3072 - cifs_stats_bytes_written(tcon, ctx->total_len); 3073 - set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3074 - 3075 - ctx->rc = (rc == 0) ? ctx->total_len : rc; 3076 - 3077 - mutex_unlock(&ctx->aio_mutex); 3078 - 3079 - if (ctx->iocb && ctx->iocb->ki_complete) 3080 - ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3081 - else 3082 - complete(&ctx->done); 3083 - } 3084 - 3085 - static ssize_t __cifs_writev( 3086 - struct kiocb *iocb, struct iov_iter *from, bool direct) 3087 - { 3088 - struct file *file = iocb->ki_filp; 3089 - ssize_t total_written = 0; 3090 - struct cifsFileInfo *cfile; 3091 - struct cifs_tcon *tcon; 3092 - struct cifs_sb_info *cifs_sb; 3093 - struct cifs_aio_ctx *ctx; 3094 - int rc; 3095 - 3096 - rc = generic_write_checks(iocb, from); 3097 - if (rc <= 0) 3098 - return rc; 3099 - 3100 - cifs_sb = CIFS_FILE_SB(file); 3101 - cfile = file->private_data; 3102 - tcon = tlink_tcon(cfile->tlink); 3103 - 3104 - if (!tcon->ses->server->ops->async_writev) 3105 - return -ENOSYS; 3106 - 3107 - ctx = cifs_aio_ctx_alloc(); 3108 - if (!ctx) 3109 - return -ENOMEM; 3110 - 3111 - ctx->cfile = cifsFileInfo_get(cfile); 3112 - 3113 - if (!is_sync_kiocb(iocb)) 3114 - ctx->iocb = iocb; 3115 - 3116 - ctx->pos = iocb->ki_pos; 3117 - ctx->direct_io = direct; 3118 - ctx->nr_pinned_pages = 0; 3119 - 3120 - if (user_backed_iter(from)) { 3121 - /* 3122 - * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3123 - * they contain references to the calling process's virtual 3124 - * memory layout which won't be available in an async worker 3125 - * thread. This also takes a pin on every folio involved. 3126 - */ 3127 - rc = netfs_extract_user_iter(from, iov_iter_count(from), 3128 - &ctx->iter, 0); 3129 - if (rc < 0) { 3130 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3131 - return rc; 3132 - } 3133 - 3134 - ctx->nr_pinned_pages = rc; 3135 - ctx->bv = (void *)ctx->iter.bvec; 3136 - ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3137 - } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3138 - !is_sync_kiocb(iocb)) { 3139 - /* 3140 - * If the op is asynchronous, we need to copy the list attached 3141 - * to a BVEC/KVEC-type iterator, but we assume that the storage 3142 - * will be pinned by the caller; in any case, we may or may not 3143 - * be able to pin the pages, so we don't try. 3144 - */ 3145 - ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3146 - if (!ctx->bv) { 3147 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3148 - return -ENOMEM; 3149 - } 3150 - } else { 3151 - /* 3152 - * Otherwise, we just pass the iterator down as-is and rely on 3153 - * the caller to make sure the pages referred to by the 3154 - * iterator don't evaporate. 3155 - */ 3156 - ctx->iter = *from; 3157 - } 3158 - 3159 - ctx->len = iov_iter_count(&ctx->iter); 3160 - 3161 - /* grab a lock here due to read response handlers can access ctx */ 3162 - mutex_lock(&ctx->aio_mutex); 3163 - 3164 - rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3165 - cfile, cifs_sb, &ctx->list, ctx); 3166 - 3167 - /* 3168 - * If at least one write was successfully sent, then discard any rc 3169 - * value from the later writes. If the other write succeeds, then 3170 - * we'll end up returning whatever was written. If it fails, then 3171 - * we'll get a new rc value from that. 3172 - */ 3173 - if (!list_empty(&ctx->list)) 3174 - rc = 0; 3175 - 3176 - mutex_unlock(&ctx->aio_mutex); 3177 - 3178 - if (rc) { 3179 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3180 - return rc; 3181 - } 3182 - 3183 - if (!is_sync_kiocb(iocb)) { 3184 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3185 - return -EIOCBQUEUED; 3186 - } 3187 - 3188 - rc = wait_for_completion_killable(&ctx->done); 3189 - if (rc) { 3190 - mutex_lock(&ctx->aio_mutex); 3191 - ctx->rc = rc = -EINTR; 3192 - total_written = ctx->total_len; 3193 - mutex_unlock(&ctx->aio_mutex); 3194 - } else { 3195 - rc = ctx->rc; 3196 - total_written = ctx->total_len; 3197 - } 3198 - 3199 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3200 - 3201 - if (unlikely(!total_written)) 3202 - return rc; 3203 - 3204 - iocb->ki_pos += total_written; 3205 - return total_written; 3206 - } 3207 - 3208 - ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3209 - { 3210 - struct file *file = iocb->ki_filp; 3211 - 3212 - cifs_revalidate_mapping(file->f_inode); 3213 - return __cifs_writev(iocb, from, true); 3214 - } 3215 - 3216 - ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3217 - { 3218 - return __cifs_writev(iocb, from, false); 3219 - } 3220 - 3221 3363 static ssize_t 3222 3364 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3223 3365 { ··· 2753 3845 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2754 3846 ssize_t rc; 2755 3847 2756 - inode_lock(inode); 3848 + rc = netfs_start_io_write(inode); 3849 + if (rc < 0) 3850 + return rc; 3851 + 2757 3852 /* 2758 3853 * We need to hold the sem to be sure nobody modifies lock list 2759 3854 * with a brlock that prevents writing. ··· 2770 3859 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 2771 3860 server->vals->exclusive_lock_type, 0, 2772 3861 NULL, CIFS_WRITE_OP)) 2773 - rc = __generic_file_write_iter(iocb, from); 3862 + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); 2774 3863 else 2775 3864 rc = -EACCES; 2776 3865 out: 2777 3866 up_read(&cinode->lock_sem); 2778 - inode_unlock(inode); 2779 - 3867 + netfs_end_io_write(inode); 2780 3868 if (rc > 0) 2781 3869 rc = generic_write_sync(iocb, rc); 2782 3870 return rc; ··· 2798 3888 2799 3889 if (CIFS_CACHE_WRITE(cinode)) { 2800 3890 if (cap_unix(tcon->ses) && 2801 - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2802 - && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 2803 - written = generic_file_write_iter(iocb, from); 3891 + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 3892 + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3893 + written = netfs_file_write_iter(iocb, from); 2804 3894 goto out; 2805 3895 } 2806 3896 written = cifs_writev(iocb, from); ··· 2812 3902 * affected pages because it may cause a error with mandatory locks on 2813 3903 * these pages but not on the region from pos to ppos+len-1. 2814 3904 */ 2815 - written = cifs_user_writev(iocb, from); 3905 + written = netfs_file_write_iter(iocb, from); 2816 3906 if (CIFS_CACHE_READ(cinode)) { 2817 3907 /* 2818 3908 * We have read level caching and we have just sent a write ··· 2831 3921 return written; 2832 3922 } 2833 3923 2834 - static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3924 + ssize_t cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) 2835 3925 { 2836 - struct cifs_readdata *rdata; 3926 + ssize_t rc; 3927 + struct inode *inode = file_inode(iocb->ki_filp); 2837 3928 2838 - rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 2839 - if (rdata) { 2840 - kref_init(&rdata->refcount); 2841 - INIT_LIST_HEAD(&rdata->list); 2842 - init_completion(&rdata->done); 2843 - INIT_WORK(&rdata->work, complete); 2844 - } 3929 + if (iocb->ki_flags & IOCB_DIRECT) 3930 + return netfs_unbuffered_read_iter(iocb, iter); 2845 3931 2846 - return rdata; 2847 - } 2848 - 2849 - void 2850 - cifs_readdata_release(struct kref *refcount) 2851 - { 2852 - struct cifs_readdata *rdata = container_of(refcount, 2853 - struct cifs_readdata, refcount); 2854 - 2855 - if (rdata->ctx) 2856 - kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 2857 - #ifdef CONFIG_CIFS_SMB_DIRECT 2858 - if (rdata->mr) { 2859 - smbd_deregister_mr(rdata->mr); 2860 - rdata->mr = NULL; 2861 - } 2862 - #endif 2863 - if (rdata->cfile) 2864 - cifsFileInfo_put(rdata->cfile); 2865 - 2866 - kfree(rdata); 2867 - } 2868 - 2869 - static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 2870 - 2871 - static void 2872 - cifs_uncached_readv_complete(struct work_struct *work) 2873 - { 2874 - struct cifs_readdata *rdata = container_of(work, 2875 - struct cifs_readdata, work); 2876 - 2877 - complete(&rdata->done); 2878 - collect_uncached_read_data(rdata->ctx); 2879 - /* the below call can possibly free the last ref to aio ctx */ 2880 - kref_put(&rdata->refcount, cifs_readdata_release); 2881 - } 2882 - 2883 - static int cifs_resend_rdata(struct cifs_readdata *rdata, 2884 - struct list_head *rdata_list, 2885 - struct cifs_aio_ctx *ctx) 2886 - { 2887 - unsigned int rsize; 2888 - struct cifs_credits credits; 2889 - int rc; 2890 - struct TCP_Server_Info *server; 2891 - 2892 - /* XXX: should we pick a new channel here? */ 2893 - server = rdata->server; 2894 - 2895 - do { 2896 - if (rdata->cfile->invalidHandle) { 2897 - rc = cifs_reopen_file(rdata->cfile, true); 2898 - if (rc == -EAGAIN) 2899 - continue; 2900 - else if (rc) 2901 - break; 2902 - } 2903 - 2904 - /* 2905 - * Wait for credits to resend this rdata. 2906 - * Note: we are attempting to resend the whole rdata not in 2907 - * segments 2908 - */ 2909 - do { 2910 - rc = server->ops->wait_mtu_credits(server, rdata->bytes, 2911 - &rsize, &credits); 2912 - 2913 - if (rc) 2914 - goto fail; 2915 - 2916 - if (rsize < rdata->bytes) { 2917 - add_credits_and_wake_if(server, &credits, 0); 2918 - msleep(1000); 2919 - } 2920 - } while (rsize < rdata->bytes); 2921 - rdata->credits = credits; 2922 - 2923 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 2924 - if (!rc) { 2925 - if (rdata->cfile->invalidHandle) 2926 - rc = -EAGAIN; 2927 - else { 2928 - #ifdef CONFIG_CIFS_SMB_DIRECT 2929 - if (rdata->mr) { 2930 - rdata->mr->need_invalidate = true; 2931 - smbd_deregister_mr(rdata->mr); 2932 - rdata->mr = NULL; 2933 - } 2934 - #endif 2935 - rc = server->ops->async_readv(rdata); 2936 - } 2937 - } 2938 - 2939 - /* If the read was successfully sent, we are done */ 2940 - if (!rc) { 2941 - /* Add to aio pending list */ 2942 - list_add_tail(&rdata->list, rdata_list); 2943 - return 0; 2944 - } 2945 - 2946 - /* Roll back credits and retry if needed */ 2947 - add_credits_and_wake_if(server, &rdata->credits, 0); 2948 - } while (rc == -EAGAIN); 2949 - 2950 - fail: 2951 - kref_put(&rdata->refcount, cifs_readdata_release); 2952 - return rc; 2953 - } 2954 - 2955 - static int 2956 - cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 2957 - struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 2958 - struct cifs_aio_ctx *ctx) 2959 - { 2960 - struct cifs_readdata *rdata; 2961 - unsigned int rsize, nsegs, max_segs = INT_MAX; 2962 - struct cifs_credits credits_on_stack; 2963 - struct cifs_credits *credits = &credits_on_stack; 2964 - size_t cur_len, max_len; 2965 - int rc; 2966 - pid_t pid; 2967 - struct TCP_Server_Info *server; 2968 - 2969 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 2970 - 2971 - #ifdef CONFIG_CIFS_SMB_DIRECT 2972 - if (server->smbd_conn) 2973 - max_segs = server->smbd_conn->max_frmr_depth; 2974 - #endif 2975 - 2976 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2977 - pid = open_file->pid; 2978 - else 2979 - pid = current->tgid; 2980 - 2981 - do { 2982 - if (open_file->invalidHandle) { 2983 - rc = cifs_reopen_file(open_file, true); 2984 - if (rc == -EAGAIN) 2985 - continue; 2986 - else if (rc) 2987 - break; 2988 - } 2989 - 2990 - if (cifs_sb->ctx->rsize == 0) 2991 - cifs_sb->ctx->rsize = 2992 - server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 2993 - cifs_sb->ctx); 2994 - 2995 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 2996 - &rsize, credits); 2997 - if (rc) 2998 - break; 2999 - 3000 - max_len = min_t(size_t, len, rsize); 3001 - 3002 - cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 3003 - max_segs, &nsegs); 3004 - cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3005 - cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 3006 - if (cur_len == 0) { 3007 - rc = -EIO; 3008 - add_credits_and_wake_if(server, credits, 0); 3009 - break; 3010 - } 3011 - 3012 - rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 3013 - if (!rdata) { 3014 - add_credits_and_wake_if(server, credits, 0); 3015 - rc = -ENOMEM; 3016 - break; 3017 - } 3018 - 3019 - rdata->server = server; 3020 - rdata->cfile = cifsFileInfo_get(open_file); 3021 - rdata->offset = fpos; 3022 - rdata->bytes = cur_len; 3023 - rdata->pid = pid; 3024 - rdata->credits = credits_on_stack; 3025 - rdata->ctx = ctx; 3026 - kref_get(&ctx->refcount); 3027 - 3028 - rdata->iter = ctx->iter; 3029 - iov_iter_truncate(&rdata->iter, cur_len); 3030 - 3031 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3032 - 3033 - if (!rc) { 3034 - if (rdata->cfile->invalidHandle) 3035 - rc = -EAGAIN; 3036 - else 3037 - rc = server->ops->async_readv(rdata); 3038 - } 3039 - 3040 - if (rc) { 3041 - add_credits_and_wake_if(server, &rdata->credits, 0); 3042 - kref_put(&rdata->refcount, cifs_readdata_release); 3043 - if (rc == -EAGAIN) 3044 - continue; 3045 - break; 3046 - } 3047 - 3048 - list_add_tail(&rdata->list, rdata_list); 3049 - iov_iter_advance(&ctx->iter, cur_len); 3050 - fpos += cur_len; 3051 - len -= cur_len; 3052 - } while (len > 0); 3053 - 3054 - return rc; 3055 - } 3056 - 3057 - static void 3058 - collect_uncached_read_data(struct cifs_aio_ctx *ctx) 3059 - { 3060 - struct cifs_readdata *rdata, *tmp; 3061 - struct cifs_sb_info *cifs_sb; 3062 - int rc; 3063 - 3064 - cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 3065 - 3066 - mutex_lock(&ctx->aio_mutex); 3067 - 3068 - if (list_empty(&ctx->list)) { 3069 - mutex_unlock(&ctx->aio_mutex); 3070 - return; 3071 - } 3072 - 3073 - rc = ctx->rc; 3074 - /* the loop below should proceed in the order of increasing offsets */ 3075 - again: 3076 - list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 3077 - if (!rc) { 3078 - if (!try_wait_for_completion(&rdata->done)) { 3079 - mutex_unlock(&ctx->aio_mutex); 3080 - return; 3081 - } 3082 - 3083 - if (rdata->result == -EAGAIN) { 3084 - /* resend call if it's a retryable error */ 3085 - struct list_head tmp_list; 3086 - unsigned int got_bytes = rdata->got_bytes; 3087 - 3088 - list_del_init(&rdata->list); 3089 - INIT_LIST_HEAD(&tmp_list); 3090 - 3091 - if (ctx->direct_io) { 3092 - /* 3093 - * Re-use rdata as this is a 3094 - * direct I/O 3095 - */ 3096 - rc = cifs_resend_rdata( 3097 - rdata, 3098 - &tmp_list, ctx); 3099 - } else { 3100 - rc = cifs_send_async_read( 3101 - rdata->offset + got_bytes, 3102 - rdata->bytes - got_bytes, 3103 - rdata->cfile, cifs_sb, 3104 - &tmp_list, ctx); 3105 - 3106 - kref_put(&rdata->refcount, 3107 - cifs_readdata_release); 3108 - } 3109 - 3110 - list_splice(&tmp_list, &ctx->list); 3111 - 3112 - goto again; 3113 - } else if (rdata->result) 3114 - rc = rdata->result; 3115 - 3116 - /* if there was a short read -- discard anything left */ 3117 - if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 3118 - rc = -ENODATA; 3119 - 3120 - ctx->total_len += rdata->got_bytes; 3121 - } 3122 - list_del_init(&rdata->list); 3123 - kref_put(&rdata->refcount, cifs_readdata_release); 3124 - } 3125 - 3126 - /* mask nodata case */ 3127 - if (rc == -ENODATA) 3128 - rc = 0; 3129 - 3130 - ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 3131 - 3132 - mutex_unlock(&ctx->aio_mutex); 3133 - 3134 - if (ctx->iocb && ctx->iocb->ki_complete) 3135 - ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3136 - else 3137 - complete(&ctx->done); 3138 - } 3139 - 3140 - static ssize_t __cifs_readv( 3141 - struct kiocb *iocb, struct iov_iter *to, bool direct) 3142 - { 3143 - size_t len; 3144 - struct file *file = iocb->ki_filp; 3145 - struct cifs_sb_info *cifs_sb; 3146 - struct cifsFileInfo *cfile; 3147 - struct cifs_tcon *tcon; 3148 - ssize_t rc, total_read = 0; 3149 - loff_t offset = iocb->ki_pos; 3150 - struct cifs_aio_ctx *ctx; 3151 - 3152 - len = iov_iter_count(to); 3153 - if (!len) 3154 - return 0; 3155 - 3156 - cifs_sb = CIFS_FILE_SB(file); 3157 - cfile = file->private_data; 3158 - tcon = tlink_tcon(cfile->tlink); 3159 - 3160 - if (!tcon->ses->server->ops->async_readv) 3161 - return -ENOSYS; 3162 - 3163 - if ((file->f_flags & O_ACCMODE) == O_WRONLY) 3164 - cifs_dbg(FYI, "attempting read on write only file instance\n"); 3165 - 3166 - ctx = cifs_aio_ctx_alloc(); 3167 - if (!ctx) 3168 - return -ENOMEM; 3169 - 3170 - ctx->pos = offset; 3171 - ctx->direct_io = direct; 3172 - ctx->len = len; 3173 - ctx->cfile = cifsFileInfo_get(cfile); 3174 - ctx->nr_pinned_pages = 0; 3175 - 3176 - if (!is_sync_kiocb(iocb)) 3177 - ctx->iocb = iocb; 3178 - 3179 - if (user_backed_iter(to)) { 3180 - /* 3181 - * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3182 - * they contain references to the calling process's virtual 3183 - * memory layout which won't be available in an async worker 3184 - * thread. This also takes a pin on every folio involved. 3185 - */ 3186 - rc = netfs_extract_user_iter(to, iov_iter_count(to), 3187 - &ctx->iter, 0); 3188 - if (rc < 0) { 3189 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3190 - return rc; 3191 - } 3192 - 3193 - ctx->nr_pinned_pages = rc; 3194 - ctx->bv = (void *)ctx->iter.bvec; 3195 - ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 3196 - ctx->should_dirty = true; 3197 - } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 3198 - !is_sync_kiocb(iocb)) { 3199 - /* 3200 - * If the op is asynchronous, we need to copy the list attached 3201 - * to a BVEC/KVEC-type iterator, but we assume that the storage 3202 - * will be retained by the caller; in any case, we may or may 3203 - * not be able to pin the pages, so we don't try. 3204 - */ 3205 - ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 3206 - if (!ctx->bv) { 3207 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3208 - return -ENOMEM; 3209 - } 3210 - } else { 3211 - /* 3212 - * Otherwise, we just pass the iterator down as-is and rely on 3213 - * the caller to make sure the pages referred to by the 3214 - * iterator don't evaporate. 3215 - */ 3216 - ctx->iter = *to; 3217 - } 3218 - 3219 - if (direct) { 3220 - rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 3221 - offset, offset + len - 1); 3222 - if (rc) { 3223 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3224 - return -EAGAIN; 3225 - } 3226 - } 3227 - 3228 - /* grab a lock here due to read response handlers can access ctx */ 3229 - mutex_lock(&ctx->aio_mutex); 3230 - 3231 - rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 3232 - 3233 - /* if at least one read request send succeeded, then reset rc */ 3234 - if (!list_empty(&ctx->list)) 3235 - rc = 0; 3236 - 3237 - mutex_unlock(&ctx->aio_mutex); 3238 - 3239 - if (rc) { 3240 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3932 + rc = cifs_revalidate_mapping(inode); 3933 + if (rc) 3241 3934 return rc; 3242 - } 3243 3935 3244 - if (!is_sync_kiocb(iocb)) { 3245 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3246 - return -EIOCBQUEUED; 3247 - } 3248 - 3249 - rc = wait_for_completion_killable(&ctx->done); 3250 - if (rc) { 3251 - mutex_lock(&ctx->aio_mutex); 3252 - ctx->rc = rc = -EINTR; 3253 - total_read = ctx->total_len; 3254 - mutex_unlock(&ctx->aio_mutex); 3255 - } else { 3256 - rc = ctx->rc; 3257 - total_read = ctx->total_len; 3258 - } 3259 - 3260 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3261 - 3262 - if (total_read) { 3263 - iocb->ki_pos += total_read; 3264 - return total_read; 3265 - } 3266 - return rc; 3936 + return netfs_file_read_iter(iocb, iter); 3267 3937 } 3268 3938 3269 - ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 3939 + ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 3270 3940 { 3271 - return __cifs_readv(iocb, to, true); 3272 - } 3941 + struct inode *inode = file_inode(iocb->ki_filp); 3942 + struct cifsInodeInfo *cinode = CIFS_I(inode); 3943 + ssize_t written; 3944 + int rc; 3273 3945 3274 - ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 3275 - { 3276 - return __cifs_readv(iocb, to, false); 3946 + if (iocb->ki_filp->f_flags & O_DIRECT) { 3947 + written = netfs_unbuffered_write_iter(iocb, from); 3948 + if (written > 0 && CIFS_CACHE_READ(cinode)) { 3949 + cifs_zap_mapping(inode); 3950 + cifs_dbg(FYI, 3951 + "Set no oplock for inode=%p after a write operation\n", 3952 + inode); 3953 + cinode->oplock = 0; 3954 + } 3955 + return written; 3956 + } 3957 + 3958 + written = cifs_get_writer(cinode); 3959 + if (written) 3960 + return written; 3961 + 3962 + written = netfs_file_write_iter(iocb, from); 3963 + 3964 + if (!CIFS_CACHE_WRITE(CIFS_I(inode))) { 3965 + rc = filemap_fdatawrite(inode->i_mapping); 3966 + if (rc) 3967 + cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", 3968 + rc, inode); 3969 + } 3970 + 3971 + cifs_put_writer(cinode); 3972 + return written; 3277 3973 } 3278 3974 3279 3975 ssize_t ··· 2902 4386 * pos+len-1. 2903 4387 */ 2904 4388 if (!CIFS_CACHE_READ(cinode)) 2905 - return cifs_user_readv(iocb, to); 4389 + return netfs_unbuffered_read_iter(iocb, to); 2906 4390 2907 4391 if (cap_unix(tcon->ses) && 2908 4392 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2909 - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2910 - return generic_file_read_iter(iocb, to); 4393 + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 4394 + if (iocb->ki_flags & IOCB_DIRECT) 4395 + return netfs_unbuffered_read_iter(iocb, to); 4396 + return netfs_buffered_read_iter(iocb, to); 4397 + } 2911 4398 2912 4399 /* 2913 4400 * We need to hold the sem to be sure nobody modifies lock list ··· 2919 4400 down_read(&cinode->lock_sem); 2920 4401 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 2921 4402 tcon->ses->server->vals->shared_lock_type, 2922 - 0, NULL, CIFS_READ_OP)) 2923 - rc = generic_file_read_iter(iocb, to); 4403 + 0, NULL, CIFS_READ_OP)) { 4404 + if (iocb->ki_flags & IOCB_DIRECT) 4405 + rc = netfs_unbuffered_read_iter(iocb, to); 4406 + else 4407 + rc = netfs_buffered_read_iter(iocb, to); 4408 + } 2924 4409 up_read(&cinode->lock_sem); 2925 4410 return rc; 2926 4411 } 2927 4412 2928 - static ssize_t 2929 - cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 2930 - { 2931 - int rc = -EACCES; 2932 - unsigned int bytes_read = 0; 2933 - unsigned int total_read; 2934 - unsigned int current_read_size; 2935 - unsigned int rsize; 2936 - struct cifs_sb_info *cifs_sb; 2937 - struct cifs_tcon *tcon; 2938 - struct TCP_Server_Info *server; 2939 - unsigned int xid; 2940 - char *cur_offset; 2941 - struct cifsFileInfo *open_file; 2942 - struct cifs_io_parms io_parms = {0}; 2943 - int buf_type = CIFS_NO_BUFFER; 2944 - __u32 pid; 2945 - 2946 - xid = get_xid(); 2947 - cifs_sb = CIFS_FILE_SB(file); 2948 - 2949 - /* FIXME: set up handlers for larger reads and/or convert to async */ 2950 - rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 2951 - 2952 - if (file->private_data == NULL) { 2953 - rc = -EBADF; 2954 - free_xid(xid); 2955 - return rc; 2956 - } 2957 - open_file = file->private_data; 2958 - tcon = tlink_tcon(open_file->tlink); 2959 - server = cifs_pick_channel(tcon->ses); 2960 - 2961 - if (!server->ops->sync_read) { 2962 - free_xid(xid); 2963 - return -ENOSYS; 2964 - } 2965 - 2966 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2967 - pid = open_file->pid; 2968 - else 2969 - pid = current->tgid; 2970 - 2971 - if ((file->f_flags & O_ACCMODE) == O_WRONLY) 2972 - cifs_dbg(FYI, "attempting read on write only file instance\n"); 2973 - 2974 - for (total_read = 0, cur_offset = read_data; read_size > total_read; 2975 - total_read += bytes_read, cur_offset += bytes_read) { 2976 - do { 2977 - current_read_size = min_t(uint, read_size - total_read, 2978 - rsize); 2979 - /* 2980 - * For windows me and 9x we do not want to request more 2981 - * than it negotiated since it will refuse the read 2982 - * then. 2983 - */ 2984 - if (!(tcon->ses->capabilities & 2985 - tcon->ses->server->vals->cap_large_files)) { 2986 - current_read_size = min_t(uint, 2987 - current_read_size, CIFSMaxBufSize); 2988 - } 2989 - if (open_file->invalidHandle) { 2990 - rc = cifs_reopen_file(open_file, true); 2991 - if (rc != 0) 2992 - break; 2993 - } 2994 - io_parms.pid = pid; 2995 - io_parms.tcon = tcon; 2996 - io_parms.offset = *offset; 2997 - io_parms.length = current_read_size; 2998 - io_parms.server = server; 2999 - rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 3000 - &bytes_read, &cur_offset, 3001 - &buf_type); 3002 - } while (rc == -EAGAIN); 3003 - 3004 - if (rc || (bytes_read == 0)) { 3005 - if (total_read) { 3006 - break; 3007 - } else { 3008 - free_xid(xid); 3009 - return rc; 3010 - } 3011 - } else { 3012 - cifs_stats_bytes_read(tcon, total_read); 3013 - *offset += bytes_read; 3014 - } 3015 - } 3016 - free_xid(xid); 3017 - return total_read; 3018 - } 3019 - 3020 - /* 3021 - * If the page is mmap'ed into a process' page tables, then we need to make 3022 - * sure that it doesn't change while being written back. 3023 - */ 3024 4413 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 3025 4414 { 3026 - struct folio *folio = page_folio(vmf->page); 3027 - 3028 - /* Wait for the folio to be written to the cache before we allow it to 3029 - * be modified. We then assume the entire folio will need writing back. 3030 - */ 3031 - #ifdef CONFIG_CIFS_FSCACHE 3032 - if (folio_test_fscache(folio) && 3033 - folio_wait_fscache_killable(folio) < 0) 3034 - return VM_FAULT_RETRY; 3035 - #endif 3036 - 3037 - folio_wait_writeback(folio); 3038 - 3039 - if (folio_lock_killable(folio) < 0) 3040 - return VM_FAULT_RETRY; 3041 - return VM_FAULT_LOCKED; 4415 + return netfs_page_mkwrite(vmf, NULL); 3042 4416 } 3043 4417 3044 4418 static const struct vm_operations_struct cifs_file_vm_ops = { ··· 2972 4560 rc = generic_file_mmap(file, vma); 2973 4561 if (!rc) 2974 4562 vma->vm_ops = &cifs_file_vm_ops; 2975 - 2976 - free_xid(xid); 2977 - return rc; 2978 - } 2979 - 2980 - /* 2981 - * Unlock a bunch of folios in the pagecache. 2982 - */ 2983 - static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 2984 - { 2985 - struct folio *folio; 2986 - XA_STATE(xas, &mapping->i_pages, first); 2987 - 2988 - rcu_read_lock(); 2989 - xas_for_each(&xas, folio, last) { 2990 - folio_unlock(folio); 2991 - } 2992 - rcu_read_unlock(); 2993 - } 2994 - 2995 - static void cifs_readahead_complete(struct work_struct *work) 2996 - { 2997 - struct cifs_readdata *rdata = container_of(work, 2998 - struct cifs_readdata, work); 2999 - struct folio *folio; 3000 - pgoff_t last; 3001 - bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 3002 - 3003 - XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 3004 - 3005 - if (good) 3006 - cifs_readahead_to_fscache(rdata->mapping->host, 3007 - rdata->offset, rdata->bytes); 3008 - 3009 - if (iov_iter_count(&rdata->iter) > 0) 3010 - iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 3011 - 3012 - last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 3013 - 3014 - rcu_read_lock(); 3015 - xas_for_each(&xas, folio, last) { 3016 - if (good) { 3017 - flush_dcache_folio(folio); 3018 - folio_mark_uptodate(folio); 3019 - } 3020 - folio_unlock(folio); 3021 - } 3022 - rcu_read_unlock(); 3023 - 3024 - kref_put(&rdata->refcount, cifs_readdata_release); 3025 - } 3026 - 3027 - static void cifs_readahead(struct readahead_control *ractl) 3028 - { 3029 - struct cifsFileInfo *open_file = ractl->file->private_data; 3030 - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 3031 - struct TCP_Server_Info *server; 3032 - unsigned int xid, nr_pages, cache_nr_pages = 0; 3033 - unsigned int ra_pages; 3034 - pgoff_t next_cached = ULONG_MAX, ra_index; 3035 - bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 3036 - cifs_inode_cookie(ractl->mapping->host)->cache_priv; 3037 - bool check_cache = caching; 3038 - pid_t pid; 3039 - int rc = 0; 3040 - 3041 - /* Note that readahead_count() lags behind our dequeuing of pages from 3042 - * the ractl, wo we have to keep track for ourselves. 3043 - */ 3044 - ra_pages = readahead_count(ractl); 3045 - ra_index = readahead_index(ractl); 3046 - 3047 - xid = get_xid(); 3048 - 3049 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3050 - pid = open_file->pid; 3051 - else 3052 - pid = current->tgid; 3053 - 3054 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3055 - 3056 - cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 3057 - __func__, ractl->file, ractl->mapping, ra_pages); 3058 - 3059 - /* 3060 - * Chop the readahead request up into rsize-sized read requests. 3061 - */ 3062 - while ((nr_pages = ra_pages)) { 3063 - unsigned int i, rsize; 3064 - struct cifs_readdata *rdata; 3065 - struct cifs_credits credits_on_stack; 3066 - struct cifs_credits *credits = &credits_on_stack; 3067 - struct folio *folio; 3068 - pgoff_t fsize; 3069 - 3070 - /* 3071 - * Find out if we have anything cached in the range of 3072 - * interest, and if so, where the next chunk of cached data is. 3073 - */ 3074 - if (caching) { 3075 - if (check_cache) { 3076 - rc = cifs_fscache_query_occupancy( 3077 - ractl->mapping->host, ra_index, nr_pages, 3078 - &next_cached, &cache_nr_pages); 3079 - if (rc < 0) 3080 - caching = false; 3081 - check_cache = false; 3082 - } 3083 - 3084 - if (ra_index == next_cached) { 3085 - /* 3086 - * TODO: Send a whole batch of pages to be read 3087 - * by the cache. 3088 - */ 3089 - folio = readahead_folio(ractl); 3090 - fsize = folio_nr_pages(folio); 3091 - ra_pages -= fsize; 3092 - ra_index += fsize; 3093 - if (cifs_readpage_from_fscache(ractl->mapping->host, 3094 - &folio->page) < 0) { 3095 - /* 3096 - * TODO: Deal with cache read failure 3097 - * here, but for the moment, delegate 3098 - * that to readpage. 3099 - */ 3100 - caching = false; 3101 - } 3102 - folio_unlock(folio); 3103 - next_cached += fsize; 3104 - cache_nr_pages -= fsize; 3105 - if (cache_nr_pages == 0) 3106 - check_cache = true; 3107 - continue; 3108 - } 3109 - } 3110 - 3111 - if (open_file->invalidHandle) { 3112 - rc = cifs_reopen_file(open_file, true); 3113 - if (rc) { 3114 - if (rc == -EAGAIN) 3115 - continue; 3116 - break; 3117 - } 3118 - } 3119 - 3120 - if (cifs_sb->ctx->rsize == 0) 3121 - cifs_sb->ctx->rsize = 3122 - server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 3123 - cifs_sb->ctx); 3124 - 3125 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 3126 - &rsize, credits); 3127 - if (rc) 3128 - break; 3129 - nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 3130 - if (next_cached != ULONG_MAX) 3131 - nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 3132 - 3133 - /* 3134 - * Give up immediately if rsize is too small to read an entire 3135 - * page. The VFS will fall back to readpage. We should never 3136 - * reach this point however since we set ra_pages to 0 when the 3137 - * rsize is smaller than a cache page. 3138 - */ 3139 - if (unlikely(!nr_pages)) { 3140 - add_credits_and_wake_if(server, credits, 0); 3141 - break; 3142 - } 3143 - 3144 - rdata = cifs_readdata_alloc(cifs_readahead_complete); 3145 - if (!rdata) { 3146 - /* best to give up if we're out of mem */ 3147 - add_credits_and_wake_if(server, credits, 0); 3148 - break; 3149 - } 3150 - 3151 - rdata->offset = ra_index * PAGE_SIZE; 3152 - rdata->bytes = nr_pages * PAGE_SIZE; 3153 - rdata->cfile = cifsFileInfo_get(open_file); 3154 - rdata->server = server; 3155 - rdata->mapping = ractl->mapping; 3156 - rdata->pid = pid; 3157 - rdata->credits = credits_on_stack; 3158 - 3159 - for (i = 0; i < nr_pages; i++) { 3160 - if (!readahead_folio(ractl)) 3161 - WARN_ON(1); 3162 - } 3163 - ra_pages -= nr_pages; 3164 - ra_index += nr_pages; 3165 - 3166 - iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 3167 - rdata->offset, rdata->bytes); 3168 - 3169 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3170 - if (!rc) { 3171 - if (rdata->cfile->invalidHandle) 3172 - rc = -EAGAIN; 3173 - else 3174 - rc = server->ops->async_readv(rdata); 3175 - } 3176 - 3177 - if (rc) { 3178 - add_credits_and_wake_if(server, &rdata->credits, 0); 3179 - cifs_unlock_folios(rdata->mapping, 3180 - rdata->offset / PAGE_SIZE, 3181 - (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 3182 - /* Fallback to the readpage in error/reconnect cases */ 3183 - kref_put(&rdata->refcount, cifs_readdata_release); 3184 - break; 3185 - } 3186 - 3187 - kref_put(&rdata->refcount, cifs_readdata_release); 3188 - } 3189 - 3190 - free_xid(xid); 3191 - } 3192 - 3193 - /* 3194 - * cifs_readpage_worker must be called with the page pinned 3195 - */ 3196 - static int cifs_readpage_worker(struct file *file, struct page *page, 3197 - loff_t *poffset) 3198 - { 3199 - struct inode *inode = file_inode(file); 3200 - struct timespec64 atime, mtime; 3201 - char *read_data; 3202 - int rc; 3203 - 3204 - /* Is the page cached? */ 3205 - rc = cifs_readpage_from_fscache(inode, page); 3206 - if (rc == 0) 3207 - goto read_complete; 3208 - 3209 - read_data = kmap(page); 3210 - /* for reads over a certain size could initiate async read ahead */ 3211 - 3212 - rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 3213 - 3214 - if (rc < 0) 3215 - goto io_error; 3216 - else 3217 - cifs_dbg(FYI, "Bytes read %d\n", rc); 3218 - 3219 - /* we do not want atime to be less than mtime, it broke some apps */ 3220 - atime = inode_set_atime_to_ts(inode, current_time(inode)); 3221 - mtime = inode_get_mtime(inode); 3222 - if (timespec64_compare(&atime, &mtime) < 0) 3223 - inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 3224 - 3225 - if (PAGE_SIZE > rc) 3226 - memset(read_data + rc, 0, PAGE_SIZE - rc); 3227 - 3228 - flush_dcache_page(page); 3229 - SetPageUptodate(page); 3230 - rc = 0; 3231 - 3232 - io_error: 3233 - kunmap(page); 3234 - 3235 - read_complete: 3236 - unlock_page(page); 3237 - return rc; 3238 - } 3239 - 3240 - static int cifs_read_folio(struct file *file, struct folio *folio) 3241 - { 3242 - struct page *page = &folio->page; 3243 - loff_t offset = page_file_offset(page); 3244 - int rc = -EACCES; 3245 - unsigned int xid; 3246 - 3247 - xid = get_xid(); 3248 - 3249 - if (file->private_data == NULL) { 3250 - rc = -EBADF; 3251 - free_xid(xid); 3252 - return rc; 3253 - } 3254 - 3255 - cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 3256 - page, (int)offset, (int)offset); 3257 - 3258 - rc = cifs_readpage_worker(file, page, &offset); 3259 4563 3260 4564 free_xid(xid); 3261 4565 return rc; ··· 3022 4894 return false; 3023 4895 } else 3024 4896 return true; 3025 - } 3026 - 3027 - static int cifs_write_begin(struct file *file, struct address_space *mapping, 3028 - loff_t pos, unsigned len, 3029 - struct page **pagep, void **fsdata) 3030 - { 3031 - int oncethru = 0; 3032 - pgoff_t index = pos >> PAGE_SHIFT; 3033 - loff_t offset = pos & (PAGE_SIZE - 1); 3034 - loff_t page_start = pos & PAGE_MASK; 3035 - loff_t i_size; 3036 - struct page *page; 3037 - int rc = 0; 3038 - 3039 - cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 3040 - 3041 - start: 3042 - page = grab_cache_page_write_begin(mapping, index); 3043 - if (!page) { 3044 - rc = -ENOMEM; 3045 - goto out; 3046 - } 3047 - 3048 - if (PageUptodate(page)) 3049 - goto out; 3050 - 3051 - /* 3052 - * If we write a full page it will be up to date, no need to read from 3053 - * the server. If the write is short, we'll end up doing a sync write 3054 - * instead. 3055 - */ 3056 - if (len == PAGE_SIZE) 3057 - goto out; 3058 - 3059 - /* 3060 - * optimize away the read when we have an oplock, and we're not 3061 - * expecting to use any of the data we'd be reading in. That 3062 - * is, when the page lies beyond the EOF, or straddles the EOF 3063 - * and the write will cover all of the existing data. 3064 - */ 3065 - if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 3066 - i_size = i_size_read(mapping->host); 3067 - if (page_start >= i_size || 3068 - (offset == 0 && (pos + len) >= i_size)) { 3069 - zero_user_segments(page, 0, offset, 3070 - offset + len, 3071 - PAGE_SIZE); 3072 - /* 3073 - * PageChecked means that the parts of the page 3074 - * to which we're not writing are considered up 3075 - * to date. Once the data is copied to the 3076 - * page, it can be set uptodate. 3077 - */ 3078 - SetPageChecked(page); 3079 - goto out; 3080 - } 3081 - } 3082 - 3083 - if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 3084 - /* 3085 - * might as well read a page, it is fast enough. If we get 3086 - * an error, we don't need to return it. cifs_write_end will 3087 - * do a sync write instead since PG_uptodate isn't set. 3088 - */ 3089 - cifs_readpage_worker(file, page, &page_start); 3090 - put_page(page); 3091 - oncethru = 1; 3092 - goto start; 3093 - } else { 3094 - /* we could try using another file handle if there is one - 3095 - but how would we lock it to prevent close of that handle 3096 - racing with this read? In any case 3097 - this will be written out by write_end so is fine */ 3098 - } 3099 - out: 3100 - *pagep = page; 3101 - return rc; 3102 - } 3103 - 3104 - static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 3105 - { 3106 - if (folio_test_private(folio)) 3107 - return 0; 3108 - if (folio_test_fscache(folio)) { 3109 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 3110 - return false; 3111 - folio_wait_fscache(folio); 3112 - } 3113 - fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 3114 - return true; 3115 - } 3116 - 3117 - static void cifs_invalidate_folio(struct folio *folio, size_t offset, 3118 - size_t length) 3119 - { 3120 - folio_wait_fscache(folio); 3121 - } 3122 - 3123 - static int cifs_launder_folio(struct folio *folio) 3124 - { 3125 - int rc = 0; 3126 - loff_t range_start = folio_pos(folio); 3127 - loff_t range_end = range_start + folio_size(folio); 3128 - struct writeback_control wbc = { 3129 - .sync_mode = WB_SYNC_ALL, 3130 - .nr_to_write = 0, 3131 - .range_start = range_start, 3132 - .range_end = range_end, 3133 - }; 3134 - 3135 - cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 3136 - 3137 - if (folio_clear_dirty_for_io(folio)) 3138 - rc = cifs_writepage_locked(&folio->page, &wbc); 3139 - 3140 - folio_wait_fscache(folio); 3141 - return rc; 3142 4897 } 3143 4898 3144 4899 void cifs_oplock_break(struct work_struct *work) ··· 3113 5102 cifs_done_oplock_break(cinode); 3114 5103 } 3115 5104 3116 - /* 3117 - * The presence of cifs_direct_io() in the address space ops vector 3118 - * allowes open() O_DIRECT flags which would have failed otherwise. 3119 - * 3120 - * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 3121 - * so this method should never be called. 3122 - * 3123 - * Direct IO is not yet supported in the cached mode. 3124 - */ 3125 - static ssize_t 3126 - cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 3127 - { 3128 - /* 3129 - * FIXME 3130 - * Eventually need to support direct IO for non forcedirectio mounts 3131 - */ 3132 - return -EINVAL; 3133 - } 3134 - 3135 5105 static int cifs_swap_activate(struct swap_info_struct *sis, 3136 5106 struct file *swap_file, sector_t *span) 3137 5107 { ··· 3174 5182 } 3175 5183 3176 5184 const struct address_space_operations cifs_addr_ops = { 3177 - .read_folio = cifs_read_folio, 3178 - .readahead = cifs_readahead, 3179 - .writepages = cifs_writepages, 3180 - .write_begin = cifs_write_begin, 3181 - .write_end = cifs_write_end, 3182 - .dirty_folio = netfs_dirty_folio, 3183 - .release_folio = cifs_release_folio, 3184 - .direct_IO = cifs_direct_io, 3185 - .invalidate_folio = cifs_invalidate_folio, 3186 - .launder_folio = cifs_launder_folio, 3187 - .migrate_folio = filemap_migrate_folio, 5185 + .read_folio = netfs_read_folio, 5186 + .readahead = netfs_readahead, 5187 + .writepages = netfs_writepages, 5188 + .dirty_folio = netfs_dirty_folio, 5189 + .release_folio = netfs_release_folio, 5190 + .direct_IO = noop_direct_IO, 5191 + .invalidate_folio = netfs_invalidate_folio, 5192 + .migrate_folio = filemap_migrate_folio, 3188 5193 /* 3189 5194 * TODO: investigate and if useful we could add an is_dirty_writeback 3190 5195 * helper if needed 3191 5196 */ 3192 - .swap_activate = cifs_swap_activate, 5197 + .swap_activate = cifs_swap_activate, 3193 5198 .swap_deactivate = cifs_swap_deactivate, 3194 5199 }; 3195 5200 ··· 3196 5207 * to leave cifs_readahead out of the address space operations. 3197 5208 */ 3198 5209 const struct address_space_operations cifs_addr_ops_smallbuf = { 3199 - .read_folio = cifs_read_folio, 3200 - .writepages = cifs_writepages, 3201 - .write_begin = cifs_write_begin, 3202 - .write_end = cifs_write_end, 3203 - .dirty_folio = netfs_dirty_folio, 3204 - .release_folio = cifs_release_folio, 3205 - .invalidate_folio = cifs_invalidate_folio, 3206 - .launder_folio = cifs_launder_folio, 3207 - .migrate_folio = filemap_migrate_folio, 5210 + .read_folio = netfs_read_folio, 5211 + .writepages = netfs_writepages, 5212 + .dirty_folio = netfs_dirty_folio, 5213 + .release_folio = netfs_release_folio, 5214 + .invalidate_folio = netfs_invalidate_folio, 5215 + .migrate_folio = filemap_migrate_folio, 3208 5216 };
-109
fs/smb/client/fscache.c
··· 170 170 cifsi->netfs.cache = NULL; 171 171 } 172 172 } 173 - 174 - /* 175 - * Fallback page reading interface. 176 - */ 177 - static int fscache_fallback_read_page(struct inode *inode, struct page *page) 178 - { 179 - struct netfs_cache_resources cres; 180 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 181 - struct iov_iter iter; 182 - struct bio_vec bvec; 183 - int ret; 184 - 185 - memset(&cres, 0, sizeof(cres)); 186 - bvec_set_page(&bvec, page, PAGE_SIZE, 0); 187 - iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE); 188 - 189 - ret = fscache_begin_read_operation(&cres, cookie); 190 - if (ret < 0) 191 - return ret; 192 - 193 - ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, 194 - NULL, NULL); 195 - fscache_end_operation(&cres); 196 - return ret; 197 - } 198 - 199 - /* 200 - * Fallback page writing interface. 201 - */ 202 - static int fscache_fallback_write_pages(struct inode *inode, loff_t start, size_t len, 203 - bool no_space_allocated_yet) 204 - { 205 - struct netfs_cache_resources cres; 206 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 207 - struct iov_iter iter; 208 - int ret; 209 - 210 - memset(&cres, 0, sizeof(cres)); 211 - iov_iter_xarray(&iter, ITER_SOURCE, &inode->i_mapping->i_pages, start, len); 212 - 213 - ret = fscache_begin_write_operation(&cres, cookie); 214 - if (ret < 0) 215 - return ret; 216 - 217 - ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode), 218 - no_space_allocated_yet); 219 - if (ret == 0) 220 - ret = fscache_write(&cres, start, &iter, NULL, NULL); 221 - fscache_end_operation(&cres); 222 - return ret; 223 - } 224 - 225 - /* 226 - * Retrieve a page from FS-Cache 227 - */ 228 - int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) 229 - { 230 - int ret; 231 - 232 - cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", 233 - __func__, cifs_inode_cookie(inode), page, inode); 234 - 235 - ret = fscache_fallback_read_page(inode, page); 236 - if (ret < 0) 237 - return ret; 238 - 239 - /* Read completed synchronously */ 240 - SetPageUptodate(page); 241 - return 0; 242 - } 243 - 244 - void __cifs_readahead_to_fscache(struct inode *inode, loff_t pos, size_t len) 245 - { 246 - cifs_dbg(FYI, "%s: (fsc: %p, p: %llx, l: %zx, i: %p)\n", 247 - __func__, cifs_inode_cookie(inode), pos, len, inode); 248 - 249 - fscache_fallback_write_pages(inode, pos, len, true); 250 - } 251 - 252 - /* 253 - * Query the cache occupancy. 254 - */ 255 - int __cifs_fscache_query_occupancy(struct inode *inode, 256 - pgoff_t first, unsigned int nr_pages, 257 - pgoff_t *_data_first, 258 - unsigned int *_data_nr_pages) 259 - { 260 - struct netfs_cache_resources cres; 261 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 262 - loff_t start, data_start; 263 - size_t len, data_len; 264 - int ret; 265 - 266 - ret = fscache_begin_read_operation(&cres, cookie); 267 - if (ret < 0) 268 - return ret; 269 - 270 - start = first * PAGE_SIZE; 271 - len = nr_pages * PAGE_SIZE; 272 - ret = cres.ops->query_occupancy(&cres, start, len, PAGE_SIZE, 273 - &data_start, &data_len); 274 - if (ret == 0) { 275 - *_data_first = data_start / PAGE_SIZE; 276 - *_data_nr_pages = len / PAGE_SIZE; 277 - } 278 - 279 - fscache_end_operation(&cres); 280 - return ret; 281 - }
-54
fs/smb/client/fscache.h
··· 74 74 i_size_read(inode), flags); 75 75 } 76 76 77 - extern int __cifs_fscache_query_occupancy(struct inode *inode, 78 - pgoff_t first, unsigned int nr_pages, 79 - pgoff_t *_data_first, 80 - unsigned int *_data_nr_pages); 81 - 82 - static inline int cifs_fscache_query_occupancy(struct inode *inode, 83 - pgoff_t first, unsigned int nr_pages, 84 - pgoff_t *_data_first, 85 - unsigned int *_data_nr_pages) 86 - { 87 - if (!cifs_inode_cookie(inode)) 88 - return -ENOBUFS; 89 - return __cifs_fscache_query_occupancy(inode, first, nr_pages, 90 - _data_first, _data_nr_pages); 91 - } 92 - 93 - extern int __cifs_readpage_from_fscache(struct inode *pinode, struct page *ppage); 94 - extern void __cifs_readahead_to_fscache(struct inode *pinode, loff_t pos, size_t len); 95 - 96 - 97 - static inline int cifs_readpage_from_fscache(struct inode *inode, 98 - struct page *page) 99 - { 100 - if (cifs_inode_cookie(inode)) 101 - return __cifs_readpage_from_fscache(inode, page); 102 - return -ENOBUFS; 103 - } 104 - 105 - static inline void cifs_readahead_to_fscache(struct inode *inode, 106 - loff_t pos, size_t len) 107 - { 108 - if (cifs_inode_cookie(inode)) 109 - __cifs_readahead_to_fscache(inode, pos, len); 110 - } 111 - 112 77 static inline bool cifs_fscache_enabled(struct inode *inode) 113 78 { 114 79 return fscache_cookie_enabled(cifs_inode_cookie(inode)); ··· 95 130 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } 96 131 static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} 97 132 static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } 98 - 99 - static inline int cifs_fscache_query_occupancy(struct inode *inode, 100 - pgoff_t first, unsigned int nr_pages, 101 - pgoff_t *_data_first, 102 - unsigned int *_data_nr_pages) 103 - { 104 - *_data_first = ULONG_MAX; 105 - *_data_nr_pages = 0; 106 - return -ENOBUFS; 107 - } 108 - 109 - static inline int 110 - cifs_readpage_from_fscache(struct inode *inode, struct page *page) 111 - { 112 - return -ENOBUFS; 113 - } 114 - 115 - static inline 116 - void cifs_readahead_to_fscache(struct inode *inode, loff_t pos, size_t len) {} 117 133 118 134 #endif /* CONFIG_CIFS_FSCACHE */ 119 135
+24 -21
fs/smb/client/inode.c
··· 28 28 #include "cached_dir.h" 29 29 #include "reparse.h" 30 30 31 + /* 32 + * Set parameters for the netfs library 33 + */ 34 + static void cifs_set_netfs_context(struct inode *inode) 35 + { 36 + struct cifsInodeInfo *cifs_i = CIFS_I(inode); 37 + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 38 + 39 + netfs_inode_init(&cifs_i->netfs, &cifs_req_ops, true); 40 + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 41 + __set_bit(NETFS_ICTX_WRITETHROUGH, &cifs_i->netfs.flags); 42 + } 43 + 31 44 static void cifs_set_ops(struct inode *inode) 32 45 { 33 46 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 47 + struct netfs_inode *ictx = netfs_inode(inode); 34 48 35 49 switch (inode->i_mode & S_IFMT) { 36 50 case S_IFREG: 37 51 inode->i_op = &cifs_file_inode_ops; 38 52 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 53 + set_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags); 39 54 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 40 55 inode->i_fop = &cifs_file_direct_nobrl_ops; 41 56 else ··· 72 57 inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 73 58 else 74 59 inode->i_data.a_ops = &cifs_addr_ops; 60 + mapping_set_large_folios(inode->i_mapping); 75 61 break; 76 62 case S_IFDIR: 77 63 if (IS_AUTOMOUNT(inode)) { ··· 237 221 238 222 if (fattr->cf_flags & CIFS_FATTR_JUNCTION) 239 223 inode->i_flags |= S_AUTOMOUNT; 240 - if (inode->i_state & I_NEW) 224 + if (inode->i_state & I_NEW) { 225 + cifs_set_netfs_context(inode); 241 226 cifs_set_ops(inode); 227 + } 242 228 return 0; 243 229 } 244 230 ··· 2449 2431 return false; 2450 2432 } 2451 2433 2452 - /* 2453 - * Zap the cache. Called when invalid_mapping flag is set. 2454 - */ 2455 - int 2456 - cifs_invalidate_mapping(struct inode *inode) 2457 - { 2458 - int rc = 0; 2459 - 2460 - if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 2461 - rc = invalidate_inode_pages2(inode->i_mapping); 2462 - if (rc) 2463 - cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n", 2464 - __func__, inode, rc); 2465 - } 2466 - 2467 - return rc; 2468 - } 2469 - 2470 2434 /** 2471 2435 * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks 2472 2436 * ··· 2485 2485 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) 2486 2486 goto skip_invalidate; 2487 2487 2488 - rc = cifs_invalidate_mapping(inode); 2489 - if (rc) 2488 + rc = filemap_invalidate_inode(inode, true, 0, LLONG_MAX); 2489 + if (rc) { 2490 + cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n", 2491 + __func__, inode, rc); 2490 2492 set_bit(CIFS_INO_INVALID_MAPPING, flags); 2493 + } 2491 2494 } 2492 2495 2493 2496 skip_invalidate:
+5 -5
fs/smb/client/smb2ops.c
··· 217 217 } 218 218 219 219 static int 220 - smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, 221 - unsigned int *num, struct cifs_credits *credits) 220 + smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 221 + size_t *num, struct cifs_credits *credits) 222 222 { 223 223 int rc = 0; 224 224 unsigned int scredits, in_flight; ··· 4490 4490 unsigned int cur_off; 4491 4491 unsigned int cur_page_idx; 4492 4492 unsigned int pad_len; 4493 - struct cifs_readdata *rdata = mid->callback_data; 4493 + struct cifs_io_subrequest *rdata = mid->callback_data; 4494 4494 struct smb2_hdr *shdr = (struct smb2_hdr *)buf; 4495 4495 int length; 4496 4496 bool use_rdma_mr = false; ··· 4592 4592 4593 4593 /* Copy the data to the output I/O iterator. */ 4594 4594 rdata->result = cifs_copy_pages_to_iter(pages, pages_len, 4595 - cur_off, &rdata->iter); 4595 + cur_off, &rdata->subreq.io_iter); 4596 4596 if (rdata->result != 0) { 4597 4597 if (is_offloaded) 4598 4598 mid->mid_state = MID_RESPONSE_MALFORMED; ··· 4606 4606 /* read response payload is in buf */ 4607 4607 WARN_ONCE(pages && !xa_empty(pages), 4608 4608 "read data can be either in buf or in pages"); 4609 - length = copy_to_iter(buf + data_offset, data_len, &rdata->iter); 4609 + length = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); 4610 4610 if (length < 0) 4611 4611 return length; 4612 4612 rdata->got_bytes = data_len;
+105 -81
fs/smb/client/smb2pdu.c
··· 23 23 #include <linux/uuid.h> 24 24 #include <linux/pagemap.h> 25 25 #include <linux/xattr.h> 26 + #include <linux/netfs.h> 27 + #include <trace/events/netfs.h> 26 28 #include "cifsglob.h" 27 29 #include "cifsacl.h" 28 30 #include "cifsproto.h" ··· 4393 4391 */ 4394 4392 static int 4395 4393 smb2_new_read_req(void **buf, unsigned int *total_len, 4396 - struct cifs_io_parms *io_parms, struct cifs_readdata *rdata, 4394 + struct cifs_io_parms *io_parms, struct cifs_io_subrequest *rdata, 4397 4395 unsigned int remaining_bytes, int request_type) 4398 4396 { 4399 4397 int rc = -EACCES; ··· 4421 4419 req->Length = cpu_to_le32(io_parms->length); 4422 4420 req->Offset = cpu_to_le64(io_parms->offset); 4423 4421 4424 - trace_smb3_read_enter(0 /* xid */, 4425 - io_parms->persistent_fid, 4426 - io_parms->tcon->tid, io_parms->tcon->ses->Suid, 4427 - io_parms->offset, io_parms->length); 4422 + trace_smb3_read_enter(rdata ? rdata->rreq->debug_id : 0, 4423 + rdata ? rdata->subreq.debug_index : 0, 4424 + rdata ? rdata->xid : 0, 4425 + io_parms->persistent_fid, 4426 + io_parms->tcon->tid, io_parms->tcon->ses->Suid, 4427 + io_parms->offset, io_parms->length); 4428 4428 #ifdef CONFIG_CIFS_SMB_DIRECT 4429 4429 /* 4430 4430 * If we want to do a RDMA write, fill in and append ··· 4436 4432 struct smbd_buffer_descriptor_v1 *v1; 4437 4433 bool need_invalidate = server->dialect == SMB30_PROT_ID; 4438 4434 4439 - rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->iter, 4435 + rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->subreq.io_iter, 4440 4436 true, need_invalidate); 4441 4437 if (!rdata->mr) 4442 4438 return -EAGAIN; ··· 4487 4483 static void 4488 4484 smb2_readv_callback(struct mid_q_entry *mid) 4489 4485 { 4490 - struct cifs_readdata *rdata = mid->callback_data; 4491 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 4486 + struct cifs_io_subrequest *rdata = mid->callback_data; 4487 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 4492 4488 struct TCP_Server_Info *server = rdata->server; 4493 4489 struct smb2_hdr *shdr = 4494 4490 (struct smb2_hdr *)rdata->iov[0].iov_base; ··· 4496 4492 struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1 }; 4497 4493 4498 4494 if (rdata->got_bytes) { 4499 - rqst.rq_iter = rdata->iter; 4500 - rqst.rq_iter_size = iov_iter_count(&rdata->iter); 4495 + rqst.rq_iter = rdata->subreq.io_iter; 4496 + rqst.rq_iter_size = iov_iter_count(&rdata->subreq.io_iter); 4501 4497 } 4502 4498 4503 4499 WARN_ONCE(rdata->server != mid->server, 4504 4500 "rdata server %p != mid server %p", 4505 4501 rdata->server, mid->server); 4506 4502 4507 - cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", 4503 + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", 4508 4504 __func__, mid->mid, mid->mid_state, rdata->result, 4509 - rdata->bytes); 4505 + rdata->subreq.len); 4510 4506 4511 4507 switch (mid->mid_state) { 4512 4508 case MID_RESPONSE_RECEIVED: ··· 4516 4512 if (server->sign && !mid->decrypted) { 4517 4513 int rc; 4518 4514 4519 - iov_iter_revert(&rqst.rq_iter, rdata->got_bytes); 4520 4515 iov_iter_truncate(&rqst.rq_iter, rdata->got_bytes); 4521 4516 rc = smb2_verify_signature(&rqst, server); 4522 4517 if (rc) ··· 4556 4553 #endif 4557 4554 if (rdata->result && rdata->result != -ENODATA) { 4558 4555 cifs_stats_fail_inc(tcon, SMB2_READ_HE); 4559 - trace_smb3_read_err(0 /* xid */, 4560 - rdata->cfile->fid.persistent_fid, 4561 - tcon->tid, tcon->ses->Suid, rdata->offset, 4562 - rdata->bytes, rdata->result); 4556 + trace_smb3_read_err(rdata->rreq->debug_id, 4557 + rdata->subreq.debug_index, 4558 + rdata->xid, 4559 + rdata->req->cfile->fid.persistent_fid, 4560 + tcon->tid, tcon->ses->Suid, rdata->subreq.start, 4561 + rdata->subreq.len, rdata->result); 4563 4562 } else 4564 - trace_smb3_read_done(0 /* xid */, 4565 - rdata->cfile->fid.persistent_fid, 4563 + trace_smb3_read_done(rdata->rreq->debug_id, 4564 + rdata->subreq.debug_index, 4565 + rdata->xid, 4566 + rdata->req->cfile->fid.persistent_fid, 4566 4567 tcon->tid, tcon->ses->Suid, 4567 - rdata->offset, rdata->got_bytes); 4568 + rdata->subreq.start, rdata->got_bytes); 4568 4569 4569 - queue_work(cifsiod_wq, &rdata->work); 4570 + if (rdata->result == -ENODATA) { 4571 + /* We may have got an EOF error because fallocate 4572 + * failed to enlarge the file. 4573 + */ 4574 + if (rdata->subreq.start < rdata->subreq.rreq->i_size) 4575 + rdata->result = 0; 4576 + } 4577 + if (rdata->result == 0 || rdata->result == -EAGAIN) 4578 + iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); 4579 + rdata->credits.value = 0; 4580 + netfs_subreq_terminated(&rdata->subreq, 4581 + (rdata->result == 0 || rdata->result == -EAGAIN) ? 4582 + rdata->got_bytes : rdata->result, true); 4570 4583 release_mid(mid); 4571 4584 add_credits(server, &credits, 0); 4572 4585 } 4573 4586 4574 4587 /* smb2_async_readv - send an async read, and set up mid to handle result */ 4575 4588 int 4576 - smb2_async_readv(struct cifs_readdata *rdata) 4589 + smb2_async_readv(struct cifs_io_subrequest *rdata) 4577 4590 { 4578 4591 int rc, flags = 0; 4579 4592 char *buf; ··· 4598 4579 struct smb_rqst rqst = { .rq_iov = rdata->iov, 4599 4580 .rq_nvec = 1 }; 4600 4581 struct TCP_Server_Info *server; 4601 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 4582 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 4602 4583 unsigned int total_len; 4603 4584 int credit_request; 4604 4585 4605 - cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 4606 - __func__, rdata->offset, rdata->bytes); 4586 + cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", 4587 + __func__, rdata->subreq.start, rdata->subreq.len); 4607 4588 4608 4589 if (!rdata->server) 4609 4590 rdata->server = cifs_pick_channel(tcon->ses); 4610 4591 4611 - io_parms.tcon = tlink_tcon(rdata->cfile->tlink); 4592 + io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink); 4612 4593 io_parms.server = server = rdata->server; 4613 - io_parms.offset = rdata->offset; 4614 - io_parms.length = rdata->bytes; 4615 - io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; 4616 - io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; 4594 + io_parms.offset = rdata->subreq.start; 4595 + io_parms.length = rdata->subreq.len; 4596 + io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; 4597 + io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; 4617 4598 io_parms.pid = rdata->pid; 4618 4599 4619 4600 rc = smb2_new_read_req( ··· 4630 4611 shdr = (struct smb2_hdr *)buf; 4631 4612 4632 4613 if (rdata->credits.value > 0) { 4633 - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, 4614 + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->subreq.len, 4634 4615 SMB2_MAX_BUFFER_SIZE)); 4635 4616 credit_request = le16_to_cpu(shdr->CreditCharge) + 8; 4636 4617 if (server->credits >= server->max_credits) ··· 4640 4621 min_t(int, server->max_credits - 4641 4622 server->credits, credit_request)); 4642 4623 4643 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4624 + rc = adjust_credits(server, &rdata->credits, rdata->subreq.len); 4644 4625 if (rc) 4645 4626 goto async_readv_out; 4646 4627 4647 4628 flags |= CIFS_HAS_CREDITS; 4648 4629 } 4649 4630 4650 - kref_get(&rdata->refcount); 4651 4631 rc = cifs_call_async(server, &rqst, 4652 4632 cifs_readv_receive, smb2_readv_callback, 4653 4633 smb3_handle_read_data, rdata, flags, 4654 4634 &rdata->credits); 4655 4635 if (rc) { 4656 - kref_put(&rdata->refcount, cifs_readdata_release); 4657 4636 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); 4658 - trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid, 4637 + trace_smb3_read_err(rdata->rreq->debug_id, 4638 + rdata->subreq.debug_index, 4639 + rdata->xid, io_parms.persistent_fid, 4659 4640 io_parms.tcon->tid, 4660 4641 io_parms.tcon->ses->Suid, 4661 4642 io_parms.offset, io_parms.length, rc); ··· 4706 4687 if (rc != -ENODATA) { 4707 4688 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); 4708 4689 cifs_dbg(VFS, "Send error in read = %d\n", rc); 4709 - trace_smb3_read_err(xid, 4690 + trace_smb3_read_err(0, 0, xid, 4710 4691 req->PersistentFileId, 4711 4692 io_parms->tcon->tid, ses->Suid, 4712 4693 io_parms->offset, io_parms->length, 4713 4694 rc); 4714 4695 } else 4715 - trace_smb3_read_done(xid, req->PersistentFileId, io_parms->tcon->tid, 4696 + trace_smb3_read_done(0, 0, xid, 4697 + req->PersistentFileId, io_parms->tcon->tid, 4716 4698 ses->Suid, io_parms->offset, 0); 4717 4699 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 4718 4700 cifs_small_buf_release(req); 4719 4701 return rc == -ENODATA ? 0 : rc; 4720 4702 } else 4721 - trace_smb3_read_done(xid, 4722 - req->PersistentFileId, 4723 - io_parms->tcon->tid, ses->Suid, 4724 - io_parms->offset, io_parms->length); 4703 + trace_smb3_read_done(0, 0, xid, 4704 + req->PersistentFileId, 4705 + io_parms->tcon->tid, ses->Suid, 4706 + io_parms->offset, io_parms->length); 4725 4707 4726 4708 cifs_small_buf_release(req); 4727 4709 ··· 4755 4735 static void 4756 4736 smb2_writev_callback(struct mid_q_entry *mid) 4757 4737 { 4758 - struct cifs_writedata *wdata = mid->callback_data; 4759 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 4738 + struct cifs_io_subrequest *wdata = mid->callback_data; 4739 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 4760 4740 struct TCP_Server_Info *server = wdata->server; 4761 - unsigned int written; 4762 4741 struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; 4763 4742 struct cifs_credits credits = { .value = 0, .instance = 0 }; 4743 + ssize_t result = 0; 4744 + size_t written; 4764 4745 4765 4746 WARN_ONCE(wdata->server != mid->server, 4766 4747 "wdata server %p != mid server %p", ··· 4771 4750 case MID_RESPONSE_RECEIVED: 4772 4751 credits.value = le16_to_cpu(rsp->hdr.CreditRequest); 4773 4752 credits.instance = server->reconnect_instance; 4774 - wdata->result = smb2_check_receive(mid, server, 0); 4775 - if (wdata->result != 0) 4753 + result = smb2_check_receive(mid, server, 0); 4754 + if (result != 0) 4776 4755 break; 4777 4756 4778 4757 written = le32_to_cpu(rsp->DataLength); ··· 4782 4761 * client. OS/2 servers are known to set incorrect 4783 4762 * CountHigh values. 4784 4763 */ 4785 - if (written > wdata->bytes) 4764 + if (written > wdata->subreq.len) 4786 4765 written &= 0xFFFF; 4787 4766 4788 - if (written < wdata->bytes) 4767 + if (written < wdata->subreq.len) 4789 4768 wdata->result = -ENOSPC; 4790 4769 else 4791 - wdata->bytes = written; 4770 + wdata->subreq.len = written; 4771 + iov_iter_advance(&wdata->subreq.io_iter, written); 4792 4772 break; 4793 4773 case MID_REQUEST_SUBMITTED: 4794 4774 case MID_RETRY_NEEDED: 4795 - wdata->result = -EAGAIN; 4775 + result = -EAGAIN; 4796 4776 break; 4797 4777 case MID_RESPONSE_MALFORMED: 4798 4778 credits.value = le16_to_cpu(rsp->hdr.CreditRequest); 4799 4779 credits.instance = server->reconnect_instance; 4800 4780 fallthrough; 4801 4781 default: 4802 - wdata->result = -EIO; 4782 + result = -EIO; 4803 4783 break; 4804 4784 } 4805 4785 #ifdef CONFIG_CIFS_SMB_DIRECT ··· 4816 4794 wdata->mr = NULL; 4817 4795 } 4818 4796 #endif 4819 - if (wdata->result) { 4797 + if (result) { 4820 4798 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 4821 - trace_smb3_write_err(0 /* no xid */, 4822 - wdata->cfile->fid.persistent_fid, 4823 - tcon->tid, tcon->ses->Suid, wdata->offset, 4824 - wdata->bytes, wdata->result); 4799 + trace_smb3_write_err(wdata->xid, 4800 + wdata->req->cfile->fid.persistent_fid, 4801 + tcon->tid, tcon->ses->Suid, wdata->subreq.start, 4802 + wdata->subreq.len, wdata->result); 4825 4803 if (wdata->result == -ENOSPC) 4826 4804 pr_warn_once("Out of space writing to %s\n", 4827 4805 tcon->tree_name); 4828 4806 } else 4829 4807 trace_smb3_write_done(0 /* no xid */, 4830 - wdata->cfile->fid.persistent_fid, 4808 + wdata->req->cfile->fid.persistent_fid, 4831 4809 tcon->tid, tcon->ses->Suid, 4832 - wdata->offset, wdata->bytes); 4810 + wdata->subreq.start, wdata->subreq.len); 4833 4811 4834 - queue_work(cifsiod_wq, &wdata->work); 4812 + wdata->credits.value = 0; 4813 + cifs_write_subrequest_terminated(wdata, result ?: written, true); 4835 4814 release_mid(mid); 4836 4815 add_credits(server, &credits, 0); 4837 4816 } 4838 4817 4839 4818 /* smb2_async_writev - send an async write, and set up mid to handle result */ 4840 - int 4841 - smb2_async_writev(struct cifs_writedata *wdata, 4842 - void (*release)(struct kref *kref)) 4819 + void 4820 + smb2_async_writev(struct cifs_io_subrequest *wdata) 4843 4821 { 4844 4822 int rc = -EACCES, flags = 0; 4845 4823 struct smb2_write_req *req = NULL; 4846 4824 struct smb2_hdr *shdr; 4847 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 4825 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 4848 4826 struct TCP_Server_Info *server = wdata->server; 4849 4827 struct kvec iov[1]; 4850 4828 struct smb_rqst rqst = { }; 4851 - unsigned int total_len; 4829 + unsigned int total_len, xid = wdata->xid; 4852 4830 struct cifs_io_parms _io_parms; 4853 4831 struct cifs_io_parms *io_parms = NULL; 4854 4832 int credit_request; 4855 4833 4856 - if (!wdata->server || wdata->replay) 4834 + if (!wdata->server || test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) 4857 4835 server = wdata->server = cifs_pick_channel(tcon->ses); 4858 4836 4859 4837 /* ··· 4863 4841 _io_parms = (struct cifs_io_parms) { 4864 4842 .tcon = tcon, 4865 4843 .server = server, 4866 - .offset = wdata->offset, 4867 - .length = wdata->bytes, 4868 - .persistent_fid = wdata->cfile->fid.persistent_fid, 4869 - .volatile_fid = wdata->cfile->fid.volatile_fid, 4844 + .offset = wdata->subreq.start, 4845 + .length = wdata->subreq.len, 4846 + .persistent_fid = wdata->req->cfile->fid.persistent_fid, 4847 + .volatile_fid = wdata->req->cfile->fid.volatile_fid, 4870 4848 .pid = wdata->pid, 4871 4849 }; 4872 4850 io_parms = &_io_parms; ··· 4874 4852 rc = smb2_plain_req_init(SMB2_WRITE, tcon, server, 4875 4853 (void **) &req, &total_len); 4876 4854 if (rc) 4877 - return rc; 4855 + goto out; 4878 4856 4879 4857 if (smb3_encryption_required(tcon)) 4880 4858 flags |= CIFS_TRANSFORM_REQ; ··· 4892 4870 offsetof(struct smb2_write_req, Buffer)); 4893 4871 req->RemainingBytes = 0; 4894 4872 4895 - trace_smb3_write_enter(0 /* xid */, 4873 + trace_smb3_write_enter(wdata->xid, 4896 4874 io_parms->persistent_fid, 4897 4875 io_parms->tcon->tid, 4898 4876 io_parms->tcon->ses->Suid, ··· 4906 4884 */ 4907 4885 if (smb3_use_rdma_offload(io_parms)) { 4908 4886 struct smbd_buffer_descriptor_v1 *v1; 4909 - size_t data_size = iov_iter_count(&wdata->iter); 4887 + size_t data_size = iov_iter_count(&wdata->subreq.io_iter); 4910 4888 bool need_invalidate = server->dialect == SMB30_PROT_ID; 4911 4889 4912 - wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->iter, 4890 + wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, 4913 4891 false, need_invalidate); 4914 4892 if (!wdata->mr) { 4915 4893 rc = -EAGAIN; ··· 4936 4914 4937 4915 rqst.rq_iov = iov; 4938 4916 rqst.rq_nvec = 1; 4939 - rqst.rq_iter = wdata->iter; 4917 + rqst.rq_iter = wdata->subreq.io_iter; 4940 4918 rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter); 4941 - if (wdata->replay) 4919 + if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) 4942 4920 smb2_set_replay(server, &rqst); 4943 4921 #ifdef CONFIG_CIFS_SMB_DIRECT 4944 4922 if (wdata->mr) ··· 4956 4934 #endif 4957 4935 4958 4936 if (wdata->credits.value > 0) { 4959 - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, 4937 + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->subreq.len, 4960 4938 SMB2_MAX_BUFFER_SIZE)); 4961 4939 credit_request = le16_to_cpu(shdr->CreditCharge) + 8; 4962 4940 if (server->credits >= server->max_credits) ··· 4973 4951 flags |= CIFS_HAS_CREDITS; 4974 4952 } 4975 4953 4976 - kref_get(&wdata->refcount); 4977 4954 rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, NULL, 4978 4955 wdata, flags, &wdata->credits); 4979 - 4956 + /* Can't touch wdata if rc == 0 */ 4980 4957 if (rc) { 4981 - trace_smb3_write_err(0 /* no xid */, 4958 + trace_smb3_write_err(xid, 4982 4959 io_parms->persistent_fid, 4983 4960 io_parms->tcon->tid, 4984 4961 io_parms->tcon->ses->Suid, 4985 4962 io_parms->offset, 4986 4963 io_parms->length, 4987 4964 rc); 4988 - kref_put(&wdata->refcount, release); 4989 4965 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 4990 4966 } 4991 4967 4992 4968 async_writev_out: 4993 4969 cifs_small_buf_release(req); 4994 - return rc; 4970 + out: 4971 + if (rc) { 4972 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 4973 + cifs_write_subrequest_terminated(wdata, rc, true); 4974 + } 4995 4975 } 4996 4976 4997 4977 /*
+2 -3
fs/smb/client/smb2proto.h
··· 210 210 extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, 211 211 u64 persistent_fid, u64 volatile_fid, 212 212 __le64 *uniqueid); 213 - extern int smb2_async_readv(struct cifs_readdata *rdata); 213 + extern int smb2_async_readv(struct cifs_io_subrequest *rdata); 214 214 extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, 215 215 unsigned int *nbytes, char **buf, int *buf_type); 216 - extern int smb2_async_writev(struct cifs_writedata *wdata, 217 - void (*release)(struct kref *kref)); 216 + extern void smb2_async_writev(struct cifs_io_subrequest *wdata); 218 217 extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, 219 218 unsigned int *nbytes, struct kvec *iov, int n_vec); 220 219 extern int SMB2_echo(struct TCP_Server_Info *server);
+125 -19
fs/smb/client/trace.h
··· 85 85 86 86 /* For logging errors in read or write */ 87 87 DECLARE_EVENT_CLASS(smb3_rw_err_class, 88 + TP_PROTO(unsigned int rreq_debug_id, 89 + unsigned int rreq_debug_index, 90 + unsigned int xid, 91 + __u64 fid, 92 + __u32 tid, 93 + __u64 sesid, 94 + __u64 offset, 95 + __u32 len, 96 + int rc), 97 + TP_ARGS(rreq_debug_id, rreq_debug_index, 98 + xid, fid, tid, sesid, offset, len, rc), 99 + TP_STRUCT__entry( 100 + __field(unsigned int, rreq_debug_id) 101 + __field(unsigned int, rreq_debug_index) 102 + __field(unsigned int, xid) 103 + __field(__u64, fid) 104 + __field(__u32, tid) 105 + __field(__u64, sesid) 106 + __field(__u64, offset) 107 + __field(__u32, len) 108 + __field(int, rc) 109 + ), 110 + TP_fast_assign( 111 + __entry->rreq_debug_id = rreq_debug_id; 112 + __entry->rreq_debug_index = rreq_debug_index; 113 + __entry->xid = xid; 114 + __entry->fid = fid; 115 + __entry->tid = tid; 116 + __entry->sesid = sesid; 117 + __entry->offset = offset; 118 + __entry->len = len; 119 + __entry->rc = rc; 120 + ), 121 + TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", 122 + __entry->rreq_debug_id, __entry->rreq_debug_index, 123 + __entry->xid, __entry->sesid, __entry->tid, __entry->fid, 124 + __entry->offset, __entry->len, __entry->rc) 125 + ) 126 + 127 + #define DEFINE_SMB3_RW_ERR_EVENT(name) \ 128 + DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \ 129 + TP_PROTO(unsigned int rreq_debug_id, \ 130 + unsigned int rreq_debug_index, \ 131 + unsigned int xid, \ 132 + __u64 fid, \ 133 + __u32 tid, \ 134 + __u64 sesid, \ 135 + __u64 offset, \ 136 + __u32 len, \ 137 + int rc), \ 138 + TP_ARGS(rreq_debug_id, rreq_debug_index, xid, fid, tid, sesid, offset, len, rc)) 139 + 140 + DEFINE_SMB3_RW_ERR_EVENT(read_err); 141 + 142 + /* For logging errors in other file I/O ops */ 143 + DECLARE_EVENT_CLASS(smb3_other_err_class, 88 144 TP_PROTO(unsigned int xid, 89 145 __u64 fid, 90 146 __u32 tid, ··· 172 116 __entry->offset, __entry->len, __entry->rc) 173 117 ) 174 118 175 - #define DEFINE_SMB3_RW_ERR_EVENT(name) \ 176 - DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \ 119 + #define DEFINE_SMB3_OTHER_ERR_EVENT(name) \ 120 + DEFINE_EVENT(smb3_other_err_class, smb3_##name, \ 177 121 TP_PROTO(unsigned int xid, \ 178 122 __u64 fid, \ 179 123 __u32 tid, \ ··· 183 127 int rc), \ 184 128 TP_ARGS(xid, fid, tid, sesid, offset, len, rc)) 185 129 186 - DEFINE_SMB3_RW_ERR_EVENT(write_err); 187 - DEFINE_SMB3_RW_ERR_EVENT(read_err); 188 - DEFINE_SMB3_RW_ERR_EVENT(query_dir_err); 189 - DEFINE_SMB3_RW_ERR_EVENT(zero_err); 190 - DEFINE_SMB3_RW_ERR_EVENT(falloc_err); 130 + DEFINE_SMB3_OTHER_ERR_EVENT(write_err); 131 + DEFINE_SMB3_OTHER_ERR_EVENT(query_dir_err); 132 + DEFINE_SMB3_OTHER_ERR_EVENT(zero_err); 133 + DEFINE_SMB3_OTHER_ERR_EVENT(falloc_err); 191 134 192 135 193 136 /* For logging successful read or write */ 194 137 DECLARE_EVENT_CLASS(smb3_rw_done_class, 138 + TP_PROTO(unsigned int rreq_debug_id, 139 + unsigned int rreq_debug_index, 140 + unsigned int xid, 141 + __u64 fid, 142 + __u32 tid, 143 + __u64 sesid, 144 + __u64 offset, 145 + __u32 len), 146 + TP_ARGS(rreq_debug_id, rreq_debug_index, 147 + xid, fid, tid, sesid, offset, len), 148 + TP_STRUCT__entry( 149 + __field(unsigned int, rreq_debug_id) 150 + __field(unsigned int, rreq_debug_index) 151 + __field(unsigned int, xid) 152 + __field(__u64, fid) 153 + __field(__u32, tid) 154 + __field(__u64, sesid) 155 + __field(__u64, offset) 156 + __field(__u32, len) 157 + ), 158 + TP_fast_assign( 159 + __entry->rreq_debug_id = rreq_debug_id; 160 + __entry->rreq_debug_index = rreq_debug_index; 161 + __entry->xid = xid; 162 + __entry->fid = fid; 163 + __entry->tid = tid; 164 + __entry->sesid = sesid; 165 + __entry->offset = offset; 166 + __entry->len = len; 167 + ), 168 + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x", 169 + __entry->rreq_debug_id, __entry->rreq_debug_index, 170 + __entry->xid, __entry->sesid, __entry->tid, __entry->fid, 171 + __entry->offset, __entry->len) 172 + ) 173 + 174 + #define DEFINE_SMB3_RW_DONE_EVENT(name) \ 175 + DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \ 176 + TP_PROTO(unsigned int rreq_debug_id, \ 177 + unsigned int rreq_debug_index, \ 178 + unsigned int xid, \ 179 + __u64 fid, \ 180 + __u32 tid, \ 181 + __u64 sesid, \ 182 + __u64 offset, \ 183 + __u32 len), \ 184 + TP_ARGS(rreq_debug_id, rreq_debug_index, xid, fid, tid, sesid, offset, len)) 185 + 186 + DEFINE_SMB3_RW_DONE_EVENT(read_enter); 187 + DEFINE_SMB3_RW_DONE_EVENT(read_done); 188 + 189 + /* For logging successful other op */ 190 + DECLARE_EVENT_CLASS(smb3_other_done_class, 195 191 TP_PROTO(unsigned int xid, 196 192 __u64 fid, 197 193 __u32 tid, ··· 272 164 __entry->offset, __entry->len) 273 165 ) 274 166 275 - #define DEFINE_SMB3_RW_DONE_EVENT(name) \ 276 - DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \ 167 + #define DEFINE_SMB3_OTHER_DONE_EVENT(name) \ 168 + DEFINE_EVENT(smb3_other_done_class, smb3_##name, \ 277 169 TP_PROTO(unsigned int xid, \ 278 170 __u64 fid, \ 279 171 __u32 tid, \ ··· 282 174 __u32 len), \ 283 175 TP_ARGS(xid, fid, tid, sesid, offset, len)) 284 176 285 - DEFINE_SMB3_RW_DONE_EVENT(write_enter); 286 - DEFINE_SMB3_RW_DONE_EVENT(read_enter); 287 - DEFINE_SMB3_RW_DONE_EVENT(query_dir_enter); 288 - DEFINE_SMB3_RW_DONE_EVENT(zero_enter); 289 - DEFINE_SMB3_RW_DONE_EVENT(falloc_enter); 290 - DEFINE_SMB3_RW_DONE_EVENT(write_done); 291 - DEFINE_SMB3_RW_DONE_EVENT(read_done); 292 - DEFINE_SMB3_RW_DONE_EVENT(query_dir_done); 293 - DEFINE_SMB3_RW_DONE_EVENT(zero_done); 294 - DEFINE_SMB3_RW_DONE_EVENT(falloc_done); 177 + DEFINE_SMB3_OTHER_DONE_EVENT(write_enter); 178 + DEFINE_SMB3_OTHER_DONE_EVENT(query_dir_enter); 179 + DEFINE_SMB3_OTHER_DONE_EVENT(zero_enter); 180 + DEFINE_SMB3_OTHER_DONE_EVENT(falloc_enter); 181 + DEFINE_SMB3_OTHER_DONE_EVENT(write_done); 182 + DEFINE_SMB3_OTHER_DONE_EVENT(query_dir_done); 183 + DEFINE_SMB3_OTHER_DONE_EVENT(zero_done); 184 + DEFINE_SMB3_OTHER_DONE_EVENT(falloc_done); 295 185 296 186 /* For logging successful set EOF (truncate) */ 297 187 DECLARE_EVENT_CLASS(smb3_eof_class,
+10 -7
fs/smb/client/transport.c
··· 691 691 } 692 692 693 693 int 694 - cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, 695 - unsigned int *num, struct cifs_credits *credits) 694 + cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 695 + size_t *num, struct cifs_credits *credits) 696 696 { 697 697 *num = size; 698 698 credits->value = 0; ··· 1692 1692 static int 1693 1693 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1694 1694 { 1695 - struct cifs_readdata *rdata = mid->callback_data; 1695 + struct cifs_io_subrequest *rdata = mid->callback_data; 1696 1696 1697 1697 return __cifs_readv_discard(server, mid, rdata->result); 1698 1698 } ··· 1702 1702 { 1703 1703 int length, len; 1704 1704 unsigned int data_offset, data_len; 1705 - struct cifs_readdata *rdata = mid->callback_data; 1705 + struct cifs_io_subrequest *rdata = mid->callback_data; 1706 1706 char *buf = server->smallbuf; 1707 1707 unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); 1708 1708 bool use_rdma_mr = false; 1709 1709 1710 - cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", 1711 - __func__, mid->mid, rdata->offset, rdata->bytes); 1710 + cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", 1711 + __func__, mid->mid, rdata->subreq.start, rdata->subreq.len); 1712 1712 1713 1713 /* 1714 1714 * read the rest of READ_RSP header (sans Data array), or whatever we ··· 1813 1813 length = data_len; /* An RDMA read is already done. */ 1814 1814 else 1815 1815 #endif 1816 - length = cifs_read_iter_from_socket(server, &rdata->iter, 1816 + { 1817 + length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter, 1817 1818 data_len); 1819 + iov_iter_revert(&rdata->subreq.io_iter, data_len); 1820 + } 1818 1821 if (length > 0) 1819 1822 rdata->got_bytes += length; 1820 1823 server->total_read += length;
+14 -8
include/linux/fscache.h
··· 172 172 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); 173 173 extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); 174 174 175 - extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, 176 - loff_t, size_t, loff_t, netfs_io_terminated_t, void *, 177 - bool); 175 + void __fscache_write_to_cache(struct fscache_cookie *cookie, 176 + struct address_space *mapping, 177 + loff_t start, size_t len, loff_t i_size, 178 + netfs_io_terminated_t term_func, 179 + void *term_func_priv, 180 + bool using_pgpriv2, bool cond); 178 181 extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); 179 182 180 183 /** ··· 600 597 * @i_size: The new size of the inode 601 598 * @term_func: The function to call upon completion 602 599 * @term_func_priv: The private data for @term_func 603 - * @caching: If PG_fscache has been set 600 + * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write 601 + * @caching: If we actually want to do the caching 604 602 * 605 603 * Helper function for a netfs to write dirty data from an inode into the cache 606 604 * object that's backing it. ··· 612 608 * marked with PG_fscache. 613 609 * 614 610 * If given, @term_func will be called upon completion and supplied with 615 - * @term_func_priv. Note that the PG_fscache flags will have been cleared by 616 - * this point, so the netfs must retain its own pin on the mapping. 611 + * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags 612 + * will have been cleared by this point, so the netfs must retain its own pin 613 + * on the mapping. 617 614 */ 618 615 static inline void fscache_write_to_cache(struct fscache_cookie *cookie, 619 616 struct address_space *mapping, 620 617 loff_t start, size_t len, loff_t i_size, 621 618 netfs_io_terminated_t term_func, 622 619 void *term_func_priv, 623 - bool caching) 620 + bool using_pgpriv2, bool caching) 624 621 { 625 622 if (caching) 626 623 __fscache_write_to_cache(cookie, mapping, start, len, i_size, 627 - term_func, term_func_priv, caching); 624 + term_func, term_func_priv, 625 + using_pgpriv2, caching); 628 626 else if (term_func) 629 627 term_func(term_func_priv, -ENOBUFS, false); 630 628
+102 -99
include/linux/netfs.h
··· 20 20 #include <linux/uio.h> 21 21 22 22 enum netfs_sreq_ref_trace; 23 - 24 - /* 25 - * Overload PG_private_2 to give us PG_fscache - this is used to indicate that 26 - * a page is currently backed by a local disk cache 27 - */ 28 - #define folio_test_fscache(folio) folio_test_private_2(folio) 29 - #define PageFsCache(page) PagePrivate2((page)) 30 - #define SetPageFsCache(page) SetPagePrivate2((page)) 31 - #define ClearPageFsCache(page) ClearPagePrivate2((page)) 32 - #define TestSetPageFsCache(page) TestSetPagePrivate2((page)) 33 - #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) 23 + typedef struct mempool_s mempool_t; 34 24 35 25 /** 36 - * folio_start_fscache - Start an fscache write on a folio. 26 + * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] 37 27 * @folio: The folio. 38 28 * 39 29 * Call this function before writing a folio to a local cache. Starting a 40 30 * second write before the first one finishes is not allowed. 31 + * 32 + * Note that this should no longer be used. 41 33 */ 42 - static inline void folio_start_fscache(struct folio *folio) 34 + static inline void folio_start_private_2(struct folio *folio) 43 35 { 44 36 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); 45 37 folio_get(folio); 46 38 folio_set_private_2(folio); 47 - } 48 - 49 - /** 50 - * folio_end_fscache - End an fscache write on a folio. 51 - * @folio: The folio. 52 - * 53 - * Call this function after the folio has been written to the local cache. 54 - * This will wake any sleepers waiting on this folio. 55 - */ 56 - static inline void folio_end_fscache(struct folio *folio) 57 - { 58 - folio_end_private_2(folio); 59 - } 60 - 61 - /** 62 - * folio_wait_fscache - Wait for an fscache write on this folio to end. 63 - * @folio: The folio. 64 - * 65 - * If this folio is currently being written to a local cache, wait for 66 - * the write to finish. Another write may start after this one finishes, 67 - * unless the caller holds the folio lock. 68 - */ 69 - static inline void folio_wait_fscache(struct folio *folio) 70 - { 71 - folio_wait_private_2(folio); 72 - } 73 - 74 - /** 75 - * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. 76 - * @folio: The folio. 77 - * 78 - * If this folio is currently being written to a local cache, wait 79 - * for the write to finish or for a fatal signal to be received. 80 - * Another write may start after this one finishes, unless the caller 81 - * holds the folio lock. 82 - * 83 - * Return: 84 - * - 0 if successful. 85 - * - -EINTR if a fatal signal was encountered. 86 - */ 87 - static inline int folio_wait_fscache_killable(struct folio *folio) 88 - { 89 - return folio_wait_private_2_killable(folio); 90 - } 91 - 92 - static inline void set_page_fscache(struct page *page) 93 - { 94 - folio_start_fscache(page_folio(page)); 95 - } 96 - 97 - static inline void end_page_fscache(struct page *page) 98 - { 99 - folio_end_private_2(page_folio(page)); 100 - } 101 - 102 - static inline void wait_on_page_fscache(struct page *page) 103 - { 104 - folio_wait_private_2(page_folio(page)); 105 - } 106 - 107 - static inline int wait_on_page_fscache_killable(struct page *page) 108 - { 109 - return folio_wait_private_2_killable(page_folio(page)); 110 39 } 111 40 112 41 /* Marks used on xarray-based buffers */ ··· 64 135 #if IS_ENABLED(CONFIG_FSCACHE) 65 136 struct fscache_cookie *cache; 66 137 #endif 138 + struct mutex wb_lock; /* Writeback serialisation */ 67 139 loff_t remote_i_size; /* Size of the remote file */ 68 140 loff_t zero_point; /* Size after which we assume there's no data 69 141 * on the server */ ··· 72 142 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ 73 143 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ 74 144 #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ 75 - #define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ 145 + #define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 146 + * write to cache on read */ 76 147 }; 77 148 78 149 /* ··· 96 165 unsigned int dirty_len; /* Write-streaming dirty data length */ 97 166 }; 98 167 #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ 168 + #define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */ 169 + 170 + static inline bool netfs_is_folio_info(const void *priv) 171 + { 172 + return (unsigned long)priv & NETFS_FOLIO_INFO; 173 + } 174 + 175 + static inline struct netfs_folio *__netfs_folio_info(const void *priv) 176 + { 177 + if (netfs_is_folio_info(priv)) 178 + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 179 + return NULL; 180 + } 99 181 100 182 static inline struct netfs_folio *netfs_folio_info(struct folio *folio) 101 183 { 102 - void *priv = folio_get_private(folio); 103 - 104 - if ((unsigned long)priv & NETFS_FOLIO_INFO) 105 - return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 106 - return NULL; 184 + return __netfs_folio_info(folio_get_private(folio)); 107 185 } 108 186 109 187 static inline struct netfs_group *netfs_folio_group(struct folio *folio) ··· 125 185 return finfo->netfs_group; 126 186 return priv; 127 187 } 188 + 189 + /* 190 + * Stream of I/O subrequests going to a particular destination, such as the 191 + * server or the local cache. This is mainly intended for writing where we may 192 + * have to write to multiple destinations concurrently. 193 + */ 194 + struct netfs_io_stream { 195 + /* Submission tracking */ 196 + struct netfs_io_subrequest *construct; /* Op being constructed */ 197 + unsigned int submit_off; /* Folio offset we're submitting from */ 198 + unsigned int submit_len; /* Amount of data left to submit */ 199 + unsigned int submit_max_len; /* Amount I/O can be rounded up to */ 200 + void (*prepare_write)(struct netfs_io_subrequest *subreq); 201 + void (*issue_write)(struct netfs_io_subrequest *subreq); 202 + /* Collection tracking */ 203 + struct list_head subrequests; /* Contributory I/O operations */ 204 + struct netfs_io_subrequest *front; /* Op being collected */ 205 + unsigned long long collected_to; /* Position we've collected results to */ 206 + size_t transferred; /* The amount transferred from this stream */ 207 + enum netfs_io_source source; /* Where to read from/write to */ 208 + unsigned short error; /* Aggregate error for the stream */ 209 + unsigned char stream_nr; /* Index of stream in parent table */ 210 + bool avail; /* T if stream is available */ 211 + bool active; /* T if stream is active */ 212 + bool need_retry; /* T if this stream needs retrying */ 213 + bool failed; /* T if this stream failed */ 214 + }; 128 215 129 216 /* 130 217 * Resources required to do operations on a cache. ··· 176 209 struct work_struct work; 177 210 struct list_head rreq_link; /* Link in rreq->subrequests */ 178 211 struct iov_iter io_iter; /* Iterator for this subrequest */ 179 - loff_t start; /* Where to start the I/O */ 212 + unsigned long long start; /* Where to start the I/O */ 213 + size_t max_len; /* Maximum size of the I/O */ 180 214 size_t len; /* Size of the I/O */ 181 215 size_t transferred; /* Amount of data transferred */ 182 216 refcount_t ref; 183 217 short error; /* 0 or error that occurred */ 184 218 unsigned short debug_index; /* Index in list (for debugging output) */ 219 + unsigned int nr_segs; /* Number of segs in io_iter */ 185 220 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ 186 221 enum netfs_io_source source; /* Where to read from/write to */ 222 + unsigned char stream_nr; /* I/O stream this belongs to */ 187 223 unsigned long flags; 188 224 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ 189 225 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ ··· 194 224 #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ 195 225 #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ 196 226 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ 227 + #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ 228 + #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ 229 + #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ 230 + #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ 231 + #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ 197 232 }; 198 233 199 234 enum netfs_io_origin { 200 235 NETFS_READAHEAD, /* This read was triggered by readahead */ 201 236 NETFS_READPAGE, /* This read is a synchronous read */ 202 237 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ 238 + NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ 203 239 NETFS_WRITEBACK, /* This write was triggered by writepages */ 204 240 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ 205 - NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ 206 241 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ 207 242 NETFS_DIO_READ, /* This is a direct I/O read */ 208 243 NETFS_DIO_WRITE, /* This is a direct I/O write */ ··· 229 254 struct netfs_cache_resources cache_resources; 230 255 struct list_head proc_link; /* Link in netfs_iorequests */ 231 256 struct list_head subrequests; /* Contributory I/O operations */ 257 + struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ 258 + #define NR_IO_STREAMS 2 //wreq->nr_io_streams 259 + struct netfs_group *group; /* Writeback group being written back */ 232 260 struct iov_iter iter; /* Unencrypted-side iterator */ 233 261 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ 234 262 void *netfs_priv; /* Private data for the netfs */ 263 + void *netfs_priv2; /* Private data for the netfs */ 235 264 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ 236 265 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ 237 266 unsigned int debug_id; 238 267 unsigned int rsize; /* Maximum read size (0 for none) */ 239 268 unsigned int wsize; /* Maximum write size (0 for none) */ 240 - unsigned int subreq_counter; /* Next subreq->debug_index */ 269 + atomic_t subreq_counter; /* Next subreq->debug_index */ 270 + unsigned int nr_group_rel; /* Number of refs to release on ->group */ 271 + spinlock_t lock; /* Lock for queuing subreqs */ 241 272 atomic_t nr_outstanding; /* Number of ops in progress */ 242 273 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ 243 - size_t submitted; /* Amount submitted for I/O so far */ 244 - size_t len; /* Length of the request */ 245 274 size_t upper_len; /* Length can be extended to here */ 275 + unsigned long long submitted; /* Amount submitted for I/O so far */ 276 + unsigned long long len; /* Length of the request */ 246 277 size_t transferred; /* Amount to be indicated as transferred */ 247 278 short error; /* 0 or error that occurred */ 248 279 enum netfs_io_origin origin; /* Origin of the request */ 249 280 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ 250 - loff_t i_size; /* Size of the file */ 251 - loff_t start; /* Start position */ 281 + unsigned long long i_size; /* Size of the file */ 282 + unsigned long long start; /* Start position */ 283 + atomic64_t issued_to; /* Write issuer folio cursor */ 284 + unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ 285 + unsigned long long collected_to; /* Point we've collected to */ 286 + unsigned long long cleaned_to; /* Position we've cleaned folios to */ 252 287 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ 253 288 refcount_t ref; 254 289 unsigned long flags; ··· 272 287 #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ 273 288 #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ 274 289 #define NETFS_RREQ_BLOCKED 10 /* We blocked */ 290 + #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ 291 + #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ 292 + #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ 293 + #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 294 + * write to cache on read */ 275 295 const struct netfs_request_ops *netfs_ops; 276 296 void (*cleanup)(struct netfs_io_request *req); 277 297 }; ··· 285 295 * Operations the network filesystem can/must provide to the helpers. 286 296 */ 287 297 struct netfs_request_ops { 288 - unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ 289 - unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ 298 + mempool_t *request_pool; 299 + mempool_t *subrequest_pool; 290 300 int (*init_request)(struct netfs_io_request *rreq, struct file *file); 291 301 void (*free_request)(struct netfs_io_request *rreq); 292 302 void (*free_subrequest)(struct netfs_io_subrequest *rreq); ··· 302 312 303 313 /* Modification handling */ 304 314 void (*update_i_size)(struct inode *inode, loff_t i_size); 315 + void (*post_modify)(struct inode *inode); 305 316 306 317 /* Write request handling */ 307 - void (*create_write_requests)(struct netfs_io_request *wreq, 308 - loff_t start, size_t len); 318 + void (*begin_writeback)(struct netfs_io_request *wreq); 319 + void (*prepare_write)(struct netfs_io_subrequest *subreq); 320 + void (*issue_write)(struct netfs_io_subrequest *subreq); 321 + void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream); 309 322 void (*invalidate_cache)(struct netfs_io_request *wreq); 310 323 }; 311 324 ··· 343 350 netfs_io_terminated_t term_func, 344 351 void *term_func_priv); 345 352 353 + /* Write data to the cache from a netfs subrequest. */ 354 + void (*issue_write)(struct netfs_io_subrequest *subreq); 355 + 346 356 /* Expand readahead request */ 347 357 void (*expand_readahead)(struct netfs_cache_resources *cres, 348 - loff_t *_start, size_t *_len, loff_t i_size); 358 + unsigned long long *_start, 359 + unsigned long long *_len, 360 + unsigned long long i_size); 349 361 350 362 /* Prepare a read operation, shortening it to a cached/uncached 351 363 * boundary as appropriate. 352 364 */ 353 365 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, 354 - loff_t i_size); 366 + unsigned long long i_size); 367 + 368 + /* Prepare a write subrequest, working out if we're allowed to do it 369 + * and finding out the maximum amount of data to gather before 370 + * attempting to submit. If we're not permitted to do it, the 371 + * subrequest should be marked failed. 372 + */ 373 + void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq); 355 374 356 375 /* Prepare a write operation, working out what part of the write we can 357 376 * actually do. ··· 415 410 void netfs_clear_inode_writeback(struct inode *inode, const void *aux); 416 411 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); 417 412 bool netfs_release_folio(struct folio *folio, gfp_t gfp); 418 - int netfs_launder_folio(struct folio *folio); 419 413 420 414 /* VMA operations API. */ 421 415 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); ··· 430 426 iov_iter_extraction_t extraction_flags); 431 427 size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 432 428 size_t max_size, size_t max_segs); 433 - struct netfs_io_subrequest *netfs_create_write_request( 434 - struct netfs_io_request *wreq, enum netfs_io_source dest, 435 - loff_t start, size_t len, work_func_t worker); 429 + void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); 436 430 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 437 431 bool was_async); 438 432 void netfs_queue_write_request(struct netfs_io_subrequest *subreq); ··· 474 472 #if IS_ENABLED(CONFIG_FSCACHE) 475 473 ctx->cache = NULL; 476 474 #endif 475 + mutex_init(&ctx->wb_lock); 477 476 /* ->releasepage() drives zero_point */ 478 477 if (use_zero_point) { 479 478 ctx->zero_point = ctx->remote_i_size;
+2
include/linux/pagemap.h
··· 40 40 int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); 41 41 int filemap_fdatawait_range_keep_errors(struct address_space *mapping, 42 42 loff_t start_byte, loff_t end_byte); 43 + int filemap_invalidate_inode(struct inode *inode, bool flush, 44 + loff_t start, loff_t end); 43 45 44 46 static inline int filemap_fdatawait(struct address_space *mapping) 45 47 {
+2
include/net/9p/client.h
··· 207 207 int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, 208 208 int *err); 209 209 int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); 210 + struct netfs_io_subrequest; 211 + void p9_client_write_subreq(struct netfs_io_subrequest *subreq); 210 212 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); 211 213 int p9dirent_read(struct p9_client *clnt, char *buf, int len, 212 214 struct p9_dirent *dirent);
+237 -13
include/trace/events/netfs.h
··· 24 24 E_(netfs_read_trace_write_begin, "WRITEBEGN") 25 25 26 26 #define netfs_write_traces \ 27 + EM(netfs_write_trace_copy_to_cache, "COPY2CACH") \ 27 28 EM(netfs_write_trace_dio_write, "DIO-WRITE") \ 28 - EM(netfs_write_trace_launder, "LAUNDER ") \ 29 29 EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ 30 30 EM(netfs_write_trace_writeback, "WRITEBACK") \ 31 31 E_(netfs_write_trace_writethrough, "WRITETHRU") ··· 34 34 EM(NETFS_READAHEAD, "RA") \ 35 35 EM(NETFS_READPAGE, "RP") \ 36 36 EM(NETFS_READ_FOR_WRITE, "RW") \ 37 + EM(NETFS_COPY_TO_CACHE, "CC") \ 37 38 EM(NETFS_WRITEBACK, "WB") \ 38 39 EM(NETFS_WRITETHROUGH, "WT") \ 39 - EM(NETFS_LAUNDER_WRITE, "LW") \ 40 40 EM(NETFS_UNBUFFERED_WRITE, "UW") \ 41 41 EM(NETFS_DIO_READ, "DR") \ 42 42 E_(NETFS_DIO_WRITE, "DW") ··· 44 44 #define netfs_rreq_traces \ 45 45 EM(netfs_rreq_trace_assess, "ASSESS ") \ 46 46 EM(netfs_rreq_trace_copy, "COPY ") \ 47 + EM(netfs_rreq_trace_collect, "COLLECT") \ 47 48 EM(netfs_rreq_trace_done, "DONE ") \ 48 49 EM(netfs_rreq_trace_free, "FREE ") \ 49 50 EM(netfs_rreq_trace_redirty, "REDIRTY") \ 50 51 EM(netfs_rreq_trace_resubmit, "RESUBMT") \ 52 + EM(netfs_rreq_trace_set_pause, "PAUSE ") \ 51 53 EM(netfs_rreq_trace_unlock, "UNLOCK ") \ 52 54 EM(netfs_rreq_trace_unmark, "UNMARK ") \ 53 55 EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ 56 + EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ 54 57 EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ 58 + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ 55 59 E_(netfs_rreq_trace_write_done, "WR-DONE") 56 60 57 61 #define netfs_sreq_sources \ ··· 68 64 E_(NETFS_INVALID_WRITE, "INVL") 69 65 70 66 #define netfs_sreq_traces \ 67 + EM(netfs_sreq_trace_discard, "DSCRD") \ 71 68 EM(netfs_sreq_trace_download_instead, "RDOWN") \ 69 + EM(netfs_sreq_trace_fail, "FAIL ") \ 72 70 EM(netfs_sreq_trace_free, "FREE ") \ 73 71 EM(netfs_sreq_trace_limited, "LIMIT") \ 74 72 EM(netfs_sreq_trace_prepare, "PREP ") \ 73 + EM(netfs_sreq_trace_prep_failed, "PRPFL") \ 75 74 EM(netfs_sreq_trace_resubmit_short, "SHORT") \ 75 + EM(netfs_sreq_trace_retry, "RETRY") \ 76 76 EM(netfs_sreq_trace_submit, "SUBMT") \ 77 77 EM(netfs_sreq_trace_terminated, "TERM ") \ 78 78 EM(netfs_sreq_trace_write, "WRITE") \ ··· 96 88 #define netfs_rreq_ref_traces \ 97 89 EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ 98 90 EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ 91 + EM(netfs_rreq_trace_get_work, "GET WORK ") \ 99 92 EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ 100 93 EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ 101 94 EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ ··· 104 95 EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ 105 96 EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ 106 97 EM(netfs_rreq_trace_put_work, "PUT WORK ") \ 98 + EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ 99 + EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ 107 100 EM(netfs_rreq_trace_see_work, "SEE WORK ") \ 108 101 E_(netfs_rreq_trace_new, "NEW ") 109 102 110 103 #define netfs_sreq_ref_traces \ 111 104 EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ 112 105 EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ 106 + EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ 113 107 EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ 114 108 EM(netfs_sreq_trace_new, "NEW ") \ 109 + EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ 115 110 EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ 116 111 EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ 112 + EM(netfs_sreq_trace_put_done, "PUT DONE ") \ 117 113 EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ 118 114 EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ 119 115 EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \ 116 + EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ 120 117 EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ 121 118 EM(netfs_sreq_trace_put_work, "PUT WORK ") \ 122 119 E_(netfs_sreq_trace_put_terminated, "PUT TERM ") ··· 139 124 EM(netfs_streaming_filled_page, "mod-streamw-f") \ 140 125 EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ 141 126 /* The rest are for writeback */ \ 127 + EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ 142 128 EM(netfs_folio_trace_clear, "clear") \ 143 - EM(netfs_folio_trace_clear_s, "clear-s") \ 129 + EM(netfs_folio_trace_clear_cc, "clear-cc") \ 144 130 EM(netfs_folio_trace_clear_g, "clear-g") \ 145 - EM(netfs_folio_trace_copy_to_cache, "copy") \ 146 - EM(netfs_folio_trace_end_copy, "end-copy") \ 131 + EM(netfs_folio_trace_clear_s, "clear-s") \ 132 + EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ 147 133 EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ 148 134 EM(netfs_folio_trace_kill, "kill") \ 149 - EM(netfs_folio_trace_launder, "launder") \ 135 + EM(netfs_folio_trace_kill_cc, "kill-cc") \ 136 + EM(netfs_folio_trace_kill_g, "kill-g") \ 137 + EM(netfs_folio_trace_kill_s, "kill-s") \ 150 138 EM(netfs_folio_trace_mkwrite, "mkwrite") \ 151 139 EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ 140 + EM(netfs_folio_trace_not_under_wback, "!wback") \ 152 141 EM(netfs_folio_trace_read_gaps, "read-gaps") \ 153 - EM(netfs_folio_trace_redirty, "redirty") \ 154 142 EM(netfs_folio_trace_redirtied, "redirtied") \ 155 143 EM(netfs_folio_trace_store, "store") \ 144 + EM(netfs_folio_trace_store_copy, "store-copy") \ 156 145 EM(netfs_folio_trace_store_plus, "store+") \ 157 146 EM(netfs_folio_trace_wthru, "wthru") \ 158 147 E_(netfs_folio_trace_wthru_plus, "wthru+") 148 + 149 + #define netfs_collect_contig_traces \ 150 + EM(netfs_contig_trace_collect, "Collect") \ 151 + EM(netfs_contig_trace_jump, "-->JUMP-->") \ 152 + E_(netfs_contig_trace_unlock, "Unlock") 159 153 160 154 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY 161 155 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY ··· 182 158 enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); 183 159 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); 184 160 enum netfs_folio_trace { netfs_folio_traces } __mode(byte); 161 + enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); 185 162 186 163 #endif 187 164 ··· 204 179 netfs_rreq_ref_traces; 205 180 netfs_sreq_ref_traces; 206 181 netfs_folio_traces; 182 + netfs_collect_contig_traces; 207 183 208 184 /* 209 185 * Now redefine the EM() and E_() macros to map the enums to the strings that ··· 305 279 __entry->start = sreq->start; 306 280 ), 307 281 308 - TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d", 282 + TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d", 309 283 __entry->rreq, __entry->index, 310 284 __print_symbolic(__entry->source, netfs_sreq_sources), 311 285 __print_symbolic(__entry->what, netfs_sreq_traces), ··· 345 319 __entry->start = sreq ? sreq->start : 0; 346 320 ), 347 321 348 - TP_printk("R=%08x[%d] %s f=%02x s=%llx %zx/%zx %s e=%d", 322 + TP_printk("R=%08x[%x] %s f=%02x s=%llx %zx/%zx %s e=%d", 349 323 __entry->rreq, __entry->index, 350 324 __print_symbolic(__entry->source, netfs_sreq_sources), 351 325 __entry->flags, ··· 438 412 __field(unsigned long long, start ) 439 413 __field(size_t, len ) 440 414 __field(unsigned int, flags ) 415 + __field(unsigned int, ino ) 441 416 ), 442 417 443 418 TP_fast_assign( 444 419 __entry->start = iocb->ki_pos; 445 420 __entry->len = iov_iter_count(from); 421 + __entry->ino = iocb->ki_filp->f_inode->i_ino; 446 422 __entry->flags = iocb->ki_flags; 447 423 ), 448 424 449 - TP_printk("WRITE-ITER s=%llx l=%zx f=%x", 450 - __entry->start, __entry->len, __entry->flags) 425 + TP_printk("WRITE-ITER i=%x s=%llx l=%zx f=%x", 426 + __entry->ino, __entry->start, __entry->len, __entry->flags) 451 427 ); 452 428 453 429 TRACE_EVENT(netfs_write, ··· 461 433 TP_STRUCT__entry( 462 434 __field(unsigned int, wreq ) 463 435 __field(unsigned int, cookie ) 436 + __field(unsigned int, ino ) 464 437 __field(enum netfs_write_trace, what ) 465 438 __field(unsigned long long, start ) 466 - __field(size_t, len ) 439 + __field(unsigned long long, len ) 467 440 ), 468 441 469 442 TP_fast_assign( ··· 472 443 struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); 473 444 __entry->wreq = wreq->debug_id; 474 445 __entry->cookie = __cookie ? __cookie->debug_id : 0; 446 + __entry->ino = wreq->inode->i_ino; 475 447 __entry->what = what; 476 448 __entry->start = wreq->start; 477 449 __entry->len = wreq->len; 478 450 ), 479 451 480 - TP_printk("R=%08x %s c=%08x by=%llx-%llx", 452 + TP_printk("R=%08x %s c=%08x i=%x by=%llx-%llx", 481 453 __entry->wreq, 482 454 __print_symbolic(__entry->what, netfs_write_traces), 483 455 __entry->cookie, 456 + __entry->ino, 484 457 __entry->start, __entry->start + __entry->len - 1) 458 + ); 459 + 460 + TRACE_EVENT(netfs_collect, 461 + TP_PROTO(const struct netfs_io_request *wreq), 462 + 463 + TP_ARGS(wreq), 464 + 465 + TP_STRUCT__entry( 466 + __field(unsigned int, wreq ) 467 + __field(unsigned int, len ) 468 + __field(unsigned long long, transferred ) 469 + __field(unsigned long long, start ) 470 + ), 471 + 472 + TP_fast_assign( 473 + __entry->wreq = wreq->debug_id; 474 + __entry->start = wreq->start; 475 + __entry->len = wreq->len; 476 + __entry->transferred = wreq->transferred; 477 + ), 478 + 479 + TP_printk("R=%08x s=%llx-%llx", 480 + __entry->wreq, 481 + __entry->start + __entry->transferred, 482 + __entry->start + __entry->len) 483 + ); 484 + 485 + TRACE_EVENT(netfs_collect_contig, 486 + TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to, 487 + enum netfs_collect_contig_trace type), 488 + 489 + TP_ARGS(wreq, to, type), 490 + 491 + TP_STRUCT__entry( 492 + __field(unsigned int, wreq) 493 + __field(enum netfs_collect_contig_trace, type) 494 + __field(unsigned long long, contiguity) 495 + __field(unsigned long long, to) 496 + ), 497 + 498 + TP_fast_assign( 499 + __entry->wreq = wreq->debug_id; 500 + __entry->type = type; 501 + __entry->contiguity = wreq->contiguity; 502 + __entry->to = to; 503 + ), 504 + 505 + TP_printk("R=%08x %llx -> %llx %s", 506 + __entry->wreq, 507 + __entry->contiguity, 508 + __entry->to, 509 + __print_symbolic(__entry->type, netfs_collect_contig_traces)) 510 + ); 511 + 512 + TRACE_EVENT(netfs_collect_sreq, 513 + TP_PROTO(const struct netfs_io_request *wreq, 514 + const struct netfs_io_subrequest *subreq), 515 + 516 + TP_ARGS(wreq, subreq), 517 + 518 + TP_STRUCT__entry( 519 + __field(unsigned int, wreq ) 520 + __field(unsigned int, subreq ) 521 + __field(unsigned int, stream ) 522 + __field(unsigned int, len ) 523 + __field(unsigned int, transferred ) 524 + __field(unsigned long long, start ) 525 + ), 526 + 527 + TP_fast_assign( 528 + __entry->wreq = wreq->debug_id; 529 + __entry->subreq = subreq->debug_index; 530 + __entry->stream = subreq->stream_nr; 531 + __entry->start = subreq->start; 532 + __entry->len = subreq->len; 533 + __entry->transferred = subreq->transferred; 534 + ), 535 + 536 + TP_printk("R=%08x[%u:%02x] s=%llx t=%x/%x", 537 + __entry->wreq, __entry->stream, __entry->subreq, 538 + __entry->start, __entry->transferred, __entry->len) 539 + ); 540 + 541 + TRACE_EVENT(netfs_collect_folio, 542 + TP_PROTO(const struct netfs_io_request *wreq, 543 + const struct folio *folio, 544 + unsigned long long fend, 545 + unsigned long long collected_to), 546 + 547 + TP_ARGS(wreq, folio, fend, collected_to), 548 + 549 + TP_STRUCT__entry( 550 + __field(unsigned int, wreq ) 551 + __field(unsigned long, index ) 552 + __field(unsigned long long, fend ) 553 + __field(unsigned long long, cleaned_to ) 554 + __field(unsigned long long, collected_to ) 555 + ), 556 + 557 + TP_fast_assign( 558 + __entry->wreq = wreq->debug_id; 559 + __entry->index = folio->index; 560 + __entry->fend = fend; 561 + __entry->cleaned_to = wreq->cleaned_to; 562 + __entry->collected_to = collected_to; 563 + ), 564 + 565 + TP_printk("R=%08x ix=%05lx r=%llx-%llx t=%llx/%llx", 566 + __entry->wreq, __entry->index, 567 + (unsigned long long)__entry->index * PAGE_SIZE, __entry->fend, 568 + __entry->cleaned_to, __entry->collected_to) 569 + ); 570 + 571 + TRACE_EVENT(netfs_collect_state, 572 + TP_PROTO(const struct netfs_io_request *wreq, 573 + unsigned long long collected_to, 574 + unsigned int notes), 575 + 576 + TP_ARGS(wreq, collected_to, notes), 577 + 578 + TP_STRUCT__entry( 579 + __field(unsigned int, wreq ) 580 + __field(unsigned int, notes ) 581 + __field(unsigned long long, collected_to ) 582 + __field(unsigned long long, cleaned_to ) 583 + __field(unsigned long long, contiguity ) 584 + ), 585 + 586 + TP_fast_assign( 587 + __entry->wreq = wreq->debug_id; 588 + __entry->notes = notes; 589 + __entry->collected_to = collected_to; 590 + __entry->cleaned_to = wreq->cleaned_to; 591 + __entry->contiguity = wreq->contiguity; 592 + ), 593 + 594 + TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x", 595 + __entry->wreq, __entry->collected_to, 596 + __entry->cleaned_to, __entry->contiguity, 597 + __entry->notes) 598 + ); 599 + 600 + TRACE_EVENT(netfs_collect_gap, 601 + TP_PROTO(const struct netfs_io_request *wreq, 602 + const struct netfs_io_stream *stream, 603 + unsigned long long jump_to, char type), 604 + 605 + TP_ARGS(wreq, stream, jump_to, type), 606 + 607 + TP_STRUCT__entry( 608 + __field(unsigned int, wreq) 609 + __field(unsigned char, stream) 610 + __field(unsigned char, type) 611 + __field(unsigned long long, from) 612 + __field(unsigned long long, to) 613 + ), 614 + 615 + TP_fast_assign( 616 + __entry->wreq = wreq->debug_id; 617 + __entry->stream = stream->stream_nr; 618 + __entry->from = stream->collected_to; 619 + __entry->to = jump_to; 620 + __entry->type = type; 621 + ), 622 + 623 + TP_printk("R=%08x[%x:] %llx->%llx %c", 624 + __entry->wreq, __entry->stream, 625 + __entry->from, __entry->to, __entry->type) 626 + ); 627 + 628 + TRACE_EVENT(netfs_collect_stream, 629 + TP_PROTO(const struct netfs_io_request *wreq, 630 + const struct netfs_io_stream *stream), 631 + 632 + TP_ARGS(wreq, stream), 633 + 634 + TP_STRUCT__entry( 635 + __field(unsigned int, wreq) 636 + __field(unsigned char, stream) 637 + __field(unsigned long long, collected_to) 638 + __field(unsigned long long, front) 639 + ), 640 + 641 + TP_fast_assign( 642 + __entry->wreq = wreq->debug_id; 643 + __entry->stream = stream->stream_nr; 644 + __entry->collected_to = stream->collected_to; 645 + __entry->front = stream->front ? stream->front->start : UINT_MAX; 646 + ), 647 + 648 + TP_printk("R=%08x[%x:] cto=%llx frn=%llx", 649 + __entry->wreq, __entry->stream, 650 + __entry->collected_to, __entry->front) 485 651 ); 486 652 487 653 #undef EM
+57 -3
mm/filemap.c
··· 1540 1540 * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. 1541 1541 * @folio: The folio to wait on. 1542 1542 * 1543 - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. 1543 + * Wait for PG_private_2 to be cleared on a folio. 1544 1544 */ 1545 1545 void folio_wait_private_2(struct folio *folio) 1546 1546 { ··· 1553 1553 * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. 1554 1554 * @folio: The folio to wait on. 1555 1555 * 1556 - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a 1557 - * fatal signal is received by the calling task. 1556 + * Wait for PG_private_2 to be cleared on a folio or until a fatal signal is 1557 + * received by the calling task. 1558 1558 * 1559 1559 * Return: 1560 1560 * - 0 if successful. ··· 4133 4133 return try_to_free_buffers(folio); 4134 4134 } 4135 4135 EXPORT_SYMBOL(filemap_release_folio); 4136 + 4137 + /** 4138 + * filemap_invalidate_inode - Invalidate/forcibly write back a range of an inode's pagecache 4139 + * @inode: The inode to flush 4140 + * @flush: Set to write back rather than simply invalidate. 4141 + * @start: First byte to in range. 4142 + * @end: Last byte in range (inclusive), or LLONG_MAX for everything from start 4143 + * onwards. 4144 + * 4145 + * Invalidate all the folios on an inode that contribute to the specified 4146 + * range, possibly writing them back first. Whilst the operation is 4147 + * undertaken, the invalidate lock is held to prevent new folios from being 4148 + * installed. 4149 + */ 4150 + int filemap_invalidate_inode(struct inode *inode, bool flush, 4151 + loff_t start, loff_t end) 4152 + { 4153 + struct address_space *mapping = inode->i_mapping; 4154 + pgoff_t first = start >> PAGE_SHIFT; 4155 + pgoff_t last = end >> PAGE_SHIFT; 4156 + pgoff_t nr = end == LLONG_MAX ? ULONG_MAX : last - first + 1; 4157 + 4158 + if (!mapping || !mapping->nrpages || end < start) 4159 + goto out; 4160 + 4161 + /* Prevent new folios from being added to the inode. */ 4162 + filemap_invalidate_lock(mapping); 4163 + 4164 + if (!mapping->nrpages) 4165 + goto unlock; 4166 + 4167 + unmap_mapping_pages(mapping, first, nr, false); 4168 + 4169 + /* Write back the data if we're asked to. */ 4170 + if (flush) { 4171 + struct writeback_control wbc = { 4172 + .sync_mode = WB_SYNC_ALL, 4173 + .nr_to_write = LONG_MAX, 4174 + .range_start = start, 4175 + .range_end = end, 4176 + }; 4177 + 4178 + filemap_fdatawrite_wbc(mapping, &wbc); 4179 + } 4180 + 4181 + /* Wait for writeback to complete on all folios and discard. */ 4182 + truncate_inode_pages_range(mapping, start, end); 4183 + 4184 + unlock: 4185 + filemap_invalidate_unlock(mapping); 4186 + out: 4187 + return filemap_check_errors(mapping); 4188 + } 4189 + EXPORT_SYMBOL_GPL(filemap_invalidate_inode); 4136 4190 4137 4191 #ifdef CONFIG_CACHESTAT_SYSCALL 4138 4192 /**
+1
mm/page-writeback.c
··· 2546 2546 folio_batch_release(&wbc->fbatch); 2547 2547 return NULL; 2548 2548 } 2549 + EXPORT_SYMBOL_GPL(writeback_iter); 2549 2550 2550 2551 /** 2551 2552 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
+1
net/9p/Kconfig
··· 5 5 6 6 menuconfig NET_9P 7 7 tristate "Plan 9 Resource Sharing Support (9P2000)" 8 + select NETFS_SUPPORT 8 9 help 9 10 If you say Y here, you will get experimental support for 10 11 Plan 9 resource sharing via the 9P2000 protocol.
+49
net/9p/client.c
··· 18 18 #include <linux/sched/signal.h> 19 19 #include <linux/uaccess.h> 20 20 #include <linux/uio.h> 21 + #include <linux/netfs.h> 21 22 #include <net/9p/9p.h> 22 23 #include <linux/parser.h> 23 24 #include <linux/seq_file.h> ··· 1661 1660 return total; 1662 1661 } 1663 1662 EXPORT_SYMBOL(p9_client_write); 1663 + 1664 + void 1665 + p9_client_write_subreq(struct netfs_io_subrequest *subreq) 1666 + { 1667 + struct netfs_io_request *wreq = subreq->rreq; 1668 + struct p9_fid *fid = wreq->netfs_priv; 1669 + struct p9_client *clnt = fid->clnt; 1670 + struct p9_req_t *req; 1671 + unsigned long long start = subreq->start + subreq->transferred; 1672 + int written, len = subreq->len - subreq->transferred; 1673 + int err; 1674 + 1675 + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n", 1676 + fid->fid, start, len); 1677 + 1678 + /* Don't bother zerocopy for small IO (< 1024) */ 1679 + if (clnt->trans_mod->zc_request && len > 1024) { 1680 + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter, 1681 + 0, wreq->len, P9_ZC_HDR_SZ, "dqd", 1682 + fid->fid, start, len); 1683 + } else { 1684 + req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, 1685 + start, len, &subreq->io_iter); 1686 + } 1687 + if (IS_ERR(req)) { 1688 + netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); 1689 + return; 1690 + } 1691 + 1692 + err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); 1693 + if (err) { 1694 + trace_9p_protocol_dump(clnt, &req->rc); 1695 + p9_req_put(clnt, req); 1696 + netfs_write_subrequest_terminated(subreq, err, false); 1697 + return; 1698 + } 1699 + 1700 + if (written > len) { 1701 + pr_err("bogus RWRITE count (%d > %u)\n", written, len); 1702 + written = len; 1703 + } 1704 + 1705 + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); 1706 + 1707 + p9_req_put(clnt, req); 1708 + netfs_write_subrequest_terminated(subreq, written, false); 1709 + } 1710 + EXPORT_SYMBOL(p9_client_write_subreq); 1664 1711 1665 1712 struct p9_wstat *p9_client_stat(struct p9_fid *fid) 1666 1713 {