Merge tag 'vfs-6.10.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

+38 -28

fs/9p/vfs_addr.c

··· 26 26 #include "cache.h" 27 27 #include "fid.h" 28 28 29 - static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq) 29 + /* 30 + * Writeback calls this when it finds a folio that needs uploading. This isn't 31 + * called if writeback only has copy-to-cache to deal with. 32 + */ 33 + static void v9fs_begin_writeback(struct netfs_io_request *wreq) 34 + { 35 + struct p9_fid *fid; 36 + 37 + fid = v9fs_fid_find_inode(wreq->inode, true, INVALID_UID, true); 38 + if (!fid) { 39 + WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", 40 + wreq->inode->i_ino); 41 + return; 42 + } 43 + 44 + wreq->wsize = fid->clnt->msize - P9_IOHDRSZ; 45 + if (fid->iounit) 46 + wreq->wsize = min(wreq->wsize, fid->iounit); 47 + wreq->netfs_priv = fid; 48 + wreq->io_streams[0].avail = true; 49 + } 50 + 51 + /* 52 + * Issue a subrequest to write to the server. 53 + */ 54 + static void v9fs_issue_write(struct netfs_io_subrequest *subreq) 30 55 { 31 56 struct p9_fid *fid = subreq->rreq->netfs_priv; 32 57 int err, len; 33 58 34 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 35 59 len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); 36 60 netfs_write_subrequest_terminated(subreq, len ?: err, false); 37 - } 38 - 39 - static void v9fs_upload_to_server_worker(struct work_struct *work) 40 - { 41 - struct netfs_io_subrequest *subreq = 42 - container_of(work, struct netfs_io_subrequest, work); 43 - 44 - v9fs_upload_to_server(subreq); 45 - } 46 - 47 - /* 48 - * Set up write requests for a writeback slice. We need to add a write request 49 - * for each write we want to make. 50 - */ 51 - static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 52 - { 53 - struct netfs_io_subrequest *subreq; 54 - 55 - subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 56 - start, len, v9fs_upload_to_server_worker); 57 - if (subreq) 58 - netfs_queue_write_request(subreq); 59 61 } 60 62 61 63 /** ··· 89 87 { 90 88 struct p9_fid *fid; 91 89 bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || 92 - rreq->origin == NETFS_WRITEBACK || 93 90 rreq->origin == NETFS_WRITETHROUGH || 94 - rreq->origin == NETFS_LAUNDER_WRITE || 95 91 rreq->origin == NETFS_UNBUFFERED_WRITE || 96 92 rreq->origin == NETFS_DIO_WRITE); 93 + 94 + if (rreq->origin == NETFS_WRITEBACK) 95 + return 0; /* We don't get the write handle until we find we 96 + * have actually dirty data and not just 97 + * copy-to-cache data. 98 + */ 97 99 98 100 if (file) { 99 101 fid = file->private_data; ··· 109 103 if (!fid) 110 104 goto no_fid; 111 105 } 106 + 107 + rreq->wsize = fid->clnt->msize - P9_IOHDRSZ; 108 + if (fid->iounit) 109 + rreq->wsize = min(rreq->wsize, fid->iounit); 112 110 113 111 /* we might need to read from a fid that was opened write-only 114 112 * for read-modify-write of page cache, use the writeback fid ··· 142 132 .init_request = v9fs_init_request, 143 133 .free_request = v9fs_free_request, 144 134 .issue_read = v9fs_issue_read, 145 - .create_write_requests = v9fs_create_write_requests, 135 + .begin_writeback = v9fs_begin_writeback, 136 + .issue_write = v9fs_issue_write, 146 137 }; 147 138 148 139 const struct address_space_operations v9fs_addr_operations = { ··· 152 141 .dirty_folio = netfs_dirty_folio, 153 142 .release_folio = netfs_release_folio, 154 143 .invalidate_folio = netfs_invalidate_folio, 155 - .launder_folio = netfs_launder_folio, 156 144 .direct_IO = noop_direct_IO, 157 145 .writepages = netfs_writepages, 158 146 };

+5 -3

fs/afs/file.c

··· 54 54 .read_folio = netfs_read_folio, 55 55 .readahead = netfs_readahead, 56 56 .dirty_folio = netfs_dirty_folio, 57 - .launder_folio = netfs_launder_folio, 58 57 .release_folio = netfs_release_folio, 59 58 .invalidate_folio = netfs_invalidate_folio, 60 59 .migrate_folio = filemap_migrate_folio, ··· 353 354 if (file) 354 355 rreq->netfs_priv = key_get(afs_file_key(file)); 355 356 rreq->rsize = 256 * 1024; 356 - rreq->wsize = 256 * 1024; 357 + rreq->wsize = 256 * 1024 * 1024; 357 358 return 0; 358 359 } 359 360 ··· 368 369 static void afs_free_request(struct netfs_io_request *rreq) 369 370 { 370 371 key_put(rreq->netfs_priv); 372 + afs_put_wb_key(rreq->netfs_priv2); 371 373 } 372 374 373 375 static void afs_update_i_size(struct inode *inode, loff_t new_i_size) ··· 400 400 .issue_read = afs_issue_read, 401 401 .update_i_size = afs_update_i_size, 402 402 .invalidate_cache = afs_netfs_invalidate_cache, 403 - .create_write_requests = afs_create_write_requests, 403 + .begin_writeback = afs_begin_writeback, 404 + .prepare_write = afs_prepare_write, 405 + .issue_write = afs_issue_write, 404 406 }; 405 407 406 408 static void afs_add_open_mmap(struct afs_vnode *vnode)

+4 -2

fs/afs/internal.h

··· 916 916 loff_t pos; 917 917 loff_t size; 918 918 loff_t i_size; 919 - bool laundering; /* Laundering page, PG_writeback not set */ 920 919 } store; 921 920 struct { 922 921 struct iattr *attr; ··· 1598 1599 /* 1599 1600 * write.c 1600 1601 */ 1602 + void afs_prepare_write(struct netfs_io_subrequest *subreq); 1603 + void afs_issue_write(struct netfs_io_subrequest *subreq); 1604 + void afs_begin_writeback(struct netfs_io_request *wreq); 1605 + void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream); 1601 1606 extern int afs_writepages(struct address_space *, struct writeback_control *); 1602 1607 extern int afs_fsync(struct file *, loff_t, loff_t, int); 1603 1608 extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); 1604 1609 extern void afs_prune_wb_keys(struct afs_vnode *); 1605 - void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len); 1606 1610 1607 1611 /* 1608 1612 * xattr.c

+2 -2

fs/afs/validation.c

··· 365 365 * written back in a regular file and completely discard the pages in a 366 366 * directory or symlink */ 367 367 if (S_ISREG(vnode->netfs.inode.i_mode)) 368 - invalidate_remote_inode(&vnode->netfs.inode); 368 + filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX); 369 369 else 370 - invalidate_inode_pages2(vnode->netfs.inode.i_mapping); 370 + filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX); 371 371 } 372 372 373 373 /*

+100 -93

fs/afs/write.c

··· 29 29 30 30 /* 31 31 * Find a key to use for the writeback. We cached the keys used to author the 32 - * writes on the vnode. *_wbk will contain the last writeback key used or NULL 33 - * and we need to start from there if it's set. 32 + * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key 33 + * record used or NULL and we need to start from there if it's set. 34 + * wreq->netfs_priv will be set to the key itself or NULL. 34 35 */ 35 - static int afs_get_writeback_key(struct afs_vnode *vnode, 36 - struct afs_wb_key **_wbk) 36 + static void afs_get_writeback_key(struct netfs_io_request *wreq) 37 37 { 38 - struct afs_wb_key *wbk = NULL; 39 - struct list_head *p; 40 - int ret = -ENOKEY, ret2; 38 + struct afs_wb_key *wbk, *old = wreq->netfs_priv2; 39 + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); 40 + 41 + key_put(wreq->netfs_priv); 42 + wreq->netfs_priv = NULL; 43 + wreq->netfs_priv2 = NULL; 41 44 42 45 spin_lock(&vnode->wb_lock); 43 - if (*_wbk) 44 - p = (*_wbk)->vnode_link.next; 46 + if (old) 47 + wbk = list_next_entry(old, vnode_link); 45 48 else 46 - p = vnode->wb_keys.next; 49 + wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link); 47 50 48 - while (p != &vnode->wb_keys) { 49 - wbk = list_entry(p, struct afs_wb_key, vnode_link); 51 + list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) { 50 52 _debug("wbk %u", key_serial(wbk->key)); 51 - ret2 = key_validate(wbk->key); 52 - if (ret2 == 0) { 53 + if (key_validate(wbk->key) == 0) { 53 54 refcount_inc(&wbk->usage); 55 + wreq->netfs_priv = key_get(wbk->key); 56 + wreq->netfs_priv2 = wbk; 54 57 _debug("USE WB KEY %u", key_serial(wbk->key)); 55 58 break; 56 59 } 57 - 58 - wbk = NULL; 59 - if (ret == -ENOKEY) 60 - ret = ret2; 61 - p = p->next; 62 60 } 63 61 64 62 spin_unlock(&vnode->wb_lock); 65 - if (*_wbk) 66 - afs_put_wb_key(*_wbk); 67 - *_wbk = wbk; 68 - return 0; 63 + 64 + afs_put_wb_key(old); 69 65 } 70 66 71 67 static void afs_store_data_success(struct afs_operation *op) ··· 71 75 op->ctime = op->file[0].scb.status.mtime_client; 72 76 afs_vnode_commit_status(op, &op->file[0]); 73 77 if (!afs_op_error(op)) { 74 - if (!op->store.laundering) 75 - afs_pages_written_back(vnode, op->store.pos, op->store.size); 78 + afs_pages_written_back(vnode, op->store.pos, op->store.size); 76 79 afs_stat_v(vnode, n_stores); 77 80 atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes); 78 81 } ··· 84 89 }; 85 90 86 91 /* 87 - * write to a file 92 + * Prepare a subrequest to write to the server. This sets the max_len 93 + * parameter. 88 94 */ 89 - static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos, 90 - bool laundering) 95 + void afs_prepare_write(struct netfs_io_subrequest *subreq) 91 96 { 97 + //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) 98 + // subreq->max_len = 512 * 1024; 99 + //else 100 + subreq->max_len = 256 * 1024 * 1024; 101 + } 102 + 103 + /* 104 + * Issue a subrequest to write to the server. 105 + */ 106 + static void afs_issue_write_worker(struct work_struct *work) 107 + { 108 + struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work); 109 + struct netfs_io_request *wreq = subreq->rreq; 92 110 struct afs_operation *op; 93 - struct afs_wb_key *wbk = NULL; 94 - loff_t size = iov_iter_count(iter); 111 + struct afs_vnode *vnode = AFS_FS_I(wreq->inode); 112 + unsigned long long pos = subreq->start + subreq->transferred; 113 + size_t len = subreq->len - subreq->transferred; 95 114 int ret = -ENOKEY; 96 115 97 - _enter("%s{%llx:%llu.%u},%llx,%llx", 116 + _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx", 117 + wreq->debug_id, subreq->debug_index, 98 118 vnode->volume->name, 99 119 vnode->fid.vid, 100 120 vnode->fid.vnode, 101 121 vnode->fid.unique, 102 - size, pos); 122 + pos, len); 103 123 104 - ret = afs_get_writeback_key(vnode, &wbk); 105 - if (ret) { 106 - _leave(" = %d [no keys]", ret); 107 - return ret; 108 - } 124 + #if 0 // Error injection 125 + if (subreq->debug_index == 3) 126 + return netfs_write_subrequest_terminated(subreq, -ENOANO, false); 109 127 110 - op = afs_alloc_operation(wbk->key, vnode->volume); 111 - if (IS_ERR(op)) { 112 - afs_put_wb_key(wbk); 113 - return -ENOMEM; 128 + if (!test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { 129 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 130 + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); 114 131 } 132 + #endif 133 + 134 + op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); 135 + if (IS_ERR(op)) 136 + return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); 115 137 116 138 afs_op_set_vnode(op, 0, vnode); 117 - op->file[0].dv_delta = 1; 139 + op->file[0].dv_delta = 1; 118 140 op->file[0].modification = true; 119 - op->store.pos = pos; 120 - op->store.size = size; 121 - op->store.laundering = laundering; 122 - op->flags |= AFS_OPERATION_UNINTR; 123 - op->ops = &afs_store_data_operation; 141 + op->store.pos = pos; 142 + op->store.size = len; 143 + op->flags |= AFS_OPERATION_UNINTR; 144 + op->ops = &afs_store_data_operation; 124 145 125 - try_next_key: 126 146 afs_begin_vnode_operation(op); 127 147 128 - op->store.write_iter = iter; 129 - op->store.i_size = max(pos + size, vnode->netfs.remote_i_size); 130 - op->mtime = inode_get_mtime(&vnode->netfs.inode); 148 + op->store.write_iter = &subreq->io_iter; 149 + op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size); 150 + op->mtime = inode_get_mtime(&vnode->netfs.inode); 131 151 132 152 afs_wait_for_operation(op); 133 - 134 - switch (afs_op_error(op)) { 153 + ret = afs_put_operation(op); 154 + switch (ret) { 135 155 case -EACCES: 136 156 case -EPERM: 137 157 case -ENOKEY: 138 158 case -EKEYEXPIRED: 139 159 case -EKEYREJECTED: 140 160 case -EKEYREVOKED: 141 - _debug("next"); 142 - 143 - ret = afs_get_writeback_key(vnode, &wbk); 144 - if (ret == 0) { 145 - key_put(op->key); 146 - op->key = key_get(wbk->key); 147 - goto try_next_key; 148 - } 161 + /* If there are more keys we can try, use the retry algorithm 162 + * to rotate the keys. 163 + */ 164 + if (wreq->netfs_priv2) 165 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 149 166 break; 150 167 } 151 168 152 - afs_put_wb_key(wbk); 153 - _leave(" = %d", afs_op_error(op)); 154 - return afs_put_operation(op); 169 + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); 155 170 } 156 171 157 - static void afs_upload_to_server(struct netfs_io_subrequest *subreq) 172 + void afs_issue_write(struct netfs_io_subrequest *subreq) 158 173 { 159 - struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); 160 - ssize_t ret; 161 - 162 - _enter("%x[%x],%zx", 163 - subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count); 164 - 165 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 166 - ret = afs_store_data(vnode, &subreq->io_iter, subreq->start, 167 - subreq->rreq->origin == NETFS_LAUNDER_WRITE); 168 - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, 169 - false); 170 - } 171 - 172 - static void afs_upload_to_server_worker(struct work_struct *work) 173 - { 174 - struct netfs_io_subrequest *subreq = 175 - container_of(work, struct netfs_io_subrequest, work); 176 - 177 - afs_upload_to_server(subreq); 174 + subreq->work.func = afs_issue_write_worker; 175 + if (!queue_work(system_unbound_wq, &subreq->work)) 176 + WARN_ON_ONCE(1); 178 177 } 179 178 180 179 /* 181 - * Set up write requests for a writeback slice. We need to add a write request 182 - * for each write we want to make. 180 + * Writeback calls this when it finds a folio that needs uploading. This isn't 181 + * called if writeback only has copy-to-cache to deal with. 183 182 */ 184 - void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len) 183 + void afs_begin_writeback(struct netfs_io_request *wreq) 185 184 { 186 - struct netfs_io_subrequest *subreq; 185 + afs_get_writeback_key(wreq); 186 + wreq->io_streams[0].avail = true; 187 + } 187 188 188 - _enter("%x,%llx-%llx", wreq->debug_id, start, start + len); 189 + /* 190 + * Prepare to retry the writes in request. Use this to try rotating the 191 + * available writeback keys. 192 + */ 193 + void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream) 194 + { 195 + struct netfs_io_subrequest *subreq = 196 + list_first_entry(&stream->subrequests, 197 + struct netfs_io_subrequest, rreq_link); 189 198 190 - subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER, 191 - start, len, afs_upload_to_server_worker); 192 - if (subreq) 193 - netfs_queue_write_request(subreq); 199 + switch (subreq->error) { 200 + case -EACCES: 201 + case -EPERM: 202 + case -ENOKEY: 203 + case -EKEYEXPIRED: 204 + case -EKEYREJECTED: 205 + case -EKEYREVOKED: 206 + afs_get_writeback_key(wreq); 207 + if (!wreq->netfs_priv) 208 + stream->failed = true; 209 + break; 210 + } 194 211 } 195 212 196 213 /*

+75 -1

fs/cachefiles/io.c

··· 9 9 #include <linux/slab.h> 10 10 #include <linux/file.h> 11 11 #include <linux/uio.h> 12 + #include <linux/bio.h> 12 13 #include <linux/falloc.h> 13 14 #include <linux/sched/mm.h> 14 15 #include <trace/events/fscache.h> ··· 494 493 * boundary as appropriate. 495 494 */ 496 495 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, 497 - loff_t i_size) 496 + unsigned long long i_size) 498 497 { 499 498 return cachefiles_do_prepare_read(&subreq->rreq->cache_resources, 500 499 subreq->start, &subreq->len, i_size, ··· 623 622 return ret; 624 623 } 625 624 625 + static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq) 626 + { 627 + struct netfs_io_request *wreq = subreq->rreq; 628 + struct netfs_cache_resources *cres = &wreq->cache_resources; 629 + 630 + _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start); 631 + 632 + subreq->max_len = ULONG_MAX; 633 + subreq->max_nr_segs = BIO_MAX_VECS; 634 + 635 + if (!cachefiles_cres_file(cres)) { 636 + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) 637 + return netfs_prepare_write_failed(subreq); 638 + if (!cachefiles_cres_file(cres)) 639 + return netfs_prepare_write_failed(subreq); 640 + } 641 + } 642 + 643 + static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) 644 + { 645 + struct netfs_io_request *wreq = subreq->rreq; 646 + struct netfs_cache_resources *cres = &wreq->cache_resources; 647 + struct cachefiles_object *object = cachefiles_cres_object(cres); 648 + struct cachefiles_cache *cache = object->volume->cache; 649 + const struct cred *saved_cred; 650 + size_t off, pre, post, len = subreq->len; 651 + loff_t start = subreq->start; 652 + int ret; 653 + 654 + _enter("W=%x[%x] %llx-%llx", 655 + wreq->debug_id, subreq->debug_index, start, start + len - 1); 656 + 657 + /* We need to start on the cache granularity boundary */ 658 + off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1); 659 + if (off) { 660 + pre = CACHEFILES_DIO_BLOCK_SIZE - off; 661 + if (pre >= len) { 662 + netfs_write_subrequest_terminated(subreq, len, false); 663 + return; 664 + } 665 + subreq->transferred += pre; 666 + start += pre; 667 + len -= pre; 668 + iov_iter_advance(&subreq->io_iter, pre); 669 + } 670 + 671 + /* We also need to end on the cache granularity boundary */ 672 + post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1); 673 + if (post) { 674 + len -= post; 675 + if (len == 0) { 676 + netfs_write_subrequest_terminated(subreq, post, false); 677 + return; 678 + } 679 + iov_iter_truncate(&subreq->io_iter, len); 680 + } 681 + 682 + cachefiles_begin_secure(cache, &saved_cred); 683 + ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), 684 + &start, &len, len, true); 685 + cachefiles_end_secure(cache, saved_cred); 686 + if (ret < 0) { 687 + netfs_write_subrequest_terminated(subreq, ret, false); 688 + return; 689 + } 690 + 691 + cachefiles_write(&subreq->rreq->cache_resources, 692 + subreq->start, &subreq->io_iter, 693 + netfs_write_subrequest_terminated, subreq); 694 + } 695 + 626 696 /* 627 697 * Clean up an operation. 628 698 */ ··· 710 638 .end_operation = cachefiles_end_operation, 711 639 .read = cachefiles_read, 712 640 .write = cachefiles_write, 641 + .issue_write = cachefiles_issue_write, 713 642 .prepare_read = cachefiles_prepare_read, 714 643 .prepare_write = cachefiles_prepare_write, 644 + .prepare_write_subreq = cachefiles_prepare_write_subreq, 715 645 .prepare_ondemand_read = cachefiles_prepare_ondemand_read, 716 646 .query_occupancy = cachefiles_query_occupancy, 717 647 };

+4 -20

fs/ceph/addr.c

··· 193 193 * block, but do not exceed the file size, unless the original 194 194 * request already exceeds it. 195 195 */ 196 - new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); 196 + new_end = umin(round_up(end, lo->stripe_unit), rreq->i_size); 197 197 if (new_end > end && new_end <= rreq->start + max_len) 198 198 rreq->len = new_end - rreq->start; 199 199 ··· 498 498 }; 499 499 500 500 #ifdef CONFIG_CEPH_FSCACHE 501 - static void ceph_set_page_fscache(struct page *page) 502 - { 503 - set_page_fscache(page); 504 - } 505 - 506 501 static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) 507 502 { 508 503 struct inode *inode = priv; ··· 512 517 struct fscache_cookie *cookie = ceph_fscache_cookie(ci); 513 518 514 519 fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), 515 - ceph_fscache_write_terminated, inode, caching); 520 + ceph_fscache_write_terminated, inode, true, caching); 516 521 } 517 522 #else 518 - static inline void ceph_set_page_fscache(struct page *page) 519 - { 520 - } 521 - 522 523 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 523 524 { 524 525 } ··· 706 715 len = wlen; 707 716 708 717 set_page_writeback(page); 709 - if (caching) 710 - ceph_set_page_fscache(page); 711 718 ceph_fscache_write_to_cache(inode, page_off, len, caching); 712 719 713 720 if (IS_ENCRYPTED(inode)) { ··· 788 799 redirty_page_for_writepage(wbc, page); 789 800 return AOP_WRITEPAGE_ACTIVATE; 790 801 } 791 - 792 - wait_on_page_fscache(page); 793 802 794 803 err = writepage_nounlock(page, wbc); 795 804 if (err == -ERESTARTSYS) { ··· 1062 1075 unlock_page(page); 1063 1076 break; 1064 1077 } 1065 - if (PageWriteback(page) || PageFsCache(page)) { 1078 + if (PageWriteback(page)) { 1066 1079 if (wbc->sync_mode == WB_SYNC_NONE) { 1067 1080 doutc(cl, "%p under writeback\n", page); 1068 1081 unlock_page(page); ··· 1070 1083 } 1071 1084 doutc(cl, "waiting on writeback %p\n", page); 1072 1085 wait_on_page_writeback(page); 1073 - wait_on_page_fscache(page); 1074 1086 } 1075 1087 1076 1088 if (!clear_page_dirty_for_io(page)) { ··· 1254 1268 } 1255 1269 1256 1270 set_page_writeback(page); 1257 - if (caching) 1258 - ceph_set_page_fscache(page); 1259 1271 len += thp_size(page); 1260 1272 } 1261 1273 ceph_fscache_write_to_cache(inode, offset, len, caching); ··· 1497 1513 if (r < 0) 1498 1514 return r; 1499 1515 1500 - folio_wait_fscache(folio); 1516 + folio_wait_private_2(folio); /* [DEPRECATED] */ 1501 1517 WARN_ON_ONCE(!folio_test_locked(folio)); 1502 1518 *pagep = &folio->page; 1503 1519 return 0;

+2

fs/ceph/inode.c

··· 577 577 578 578 /* Set parameters for the netfs library */ 579 579 netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); 580 + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ 581 + __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); 580 582 581 583 spin_lock_init(&ci->i_ceph_lock); 582 584

+2 -1

fs/netfs/Makefile

··· 11 11 main.o \ 12 12 misc.o \ 13 13 objects.o \ 14 - output.o 14 + write_collect.o \ 15 + write_issue.o 15 16 16 17 netfs-$(CONFIG_NETFS_STATS) += stats.o 17 18

+26 -14

fs/netfs/buffered_read.c

··· 10 10 #include "internal.h" 11 11 12 12 /* 13 - * Unlock the folios in a read operation. We need to set PG_fscache on any 13 + * Unlock the folios in a read operation. We need to set PG_writeback on any 14 14 * folios we're going to write back before we unlock them. 15 + * 16 + * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use 17 + * PG_private_2 and do a direct write to the cache from here instead. 15 18 */ 16 19 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17 20 { ··· 51 48 xas_for_each(&xas, folio, last_page) { 52 49 loff_t pg_end; 53 50 bool pg_failed = false; 54 - bool folio_started; 51 + bool wback_to_cache = false; 52 + bool folio_started = false; 55 53 56 54 if (xas_retry(&xas, folio)) 57 55 continue; 58 56 59 57 pg_end = folio_pos(folio) + folio_size(folio) - 1; 60 58 61 - folio_started = false; 62 59 for (;;) { 63 60 loff_t sreq_end; 64 61 ··· 66 63 pg_failed = true; 67 64 break; 68 65 } 69 - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 70 - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 - folio_start_fscache(folio); 72 - folio_started = true; 66 + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 67 + if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, 68 + &subreq->flags)) { 69 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 70 + folio_start_private_2(folio); 71 + folio_started = true; 72 + } 73 + } else { 74 + wback_to_cache |= 75 + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 73 76 } 74 77 pg_failed |= subreq_failed; 75 78 sreq_end = subreq->start + subreq->len - 1; ··· 107 98 kfree(finfo); 108 99 } 109 100 folio_mark_uptodate(folio); 101 + if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 102 + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 103 + folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 104 + filemap_dirty_folio(folio->mapping, folio); 105 + } 110 106 } 111 107 112 108 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { ··· 130 116 } 131 117 132 118 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 133 - loff_t *_start, size_t *_len, loff_t i_size) 119 + unsigned long long *_start, 120 + unsigned long long *_len, 121 + unsigned long long i_size) 134 122 { 135 123 struct netfs_cache_resources *cres = &rreq->cache_resources; 136 124 ··· 282 266 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 283 267 goto discard; 284 268 285 - netfs_stat(&netfs_n_rh_readpage); 269 + netfs_stat(&netfs_n_rh_read_folio); 286 270 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 287 271 288 272 /* Set up the output buffer */ ··· 466 450 if (!netfs_is_cache_enabled(ctx) && 467 451 netfs_skip_folio_read(folio, pos, len, false)) { 468 452 netfs_stat(&netfs_n_rh_write_zskip); 469 - goto have_folio_no_wait; 453 + goto have_folio; 470 454 } 471 455 472 456 rreq = netfs_alloc_request(mapping, file, ··· 507 491 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 508 492 509 493 have_folio: 510 - ret = folio_wait_fscache_killable(folio); 511 - if (ret < 0) 512 - goto error; 513 - have_folio_no_wait: 514 494 *_folio = folio; 515 495 _leave(" = 0"); 516 496 return 0;

+77 -754

fs/netfs/buffered_write.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 - /* Network filesystem high-level write support. 2 + /* Network filesystem high-level buffered write support. 3 3 * 4 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 5 * Written by David Howells (dhowells@redhat.com) ··· 26 26 NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ 27 27 }; 28 28 29 - static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); 30 - 31 29 static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) 32 30 { 33 - if (netfs_group && !folio_get_private(folio)) 34 - folio_attach_private(folio, netfs_get_group(netfs_group)); 35 - } 31 + void *priv = folio_get_private(folio); 36 32 37 - #if IS_ENABLED(CONFIG_FSCACHE) 38 - static void netfs_folio_start_fscache(bool caching, struct folio *folio) 39 - { 40 - if (caching) 41 - folio_start_fscache(folio); 33 + if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) 34 + folio_attach_private(folio, netfs_get_group(netfs_group)); 35 + else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) 36 + folio_detach_private(folio); 42 37 } 43 - #else 44 - static void netfs_folio_start_fscache(bool caching, struct folio *folio) 45 - { 46 - } 47 - #endif 48 38 49 39 /* 50 40 * Decide how we should modify a folio. We might be attempting to do ··· 53 63 bool maybe_trouble) 54 64 { 55 65 struct netfs_folio *finfo = netfs_folio_info(folio); 66 + struct netfs_group *group = netfs_folio_group(folio); 56 67 loff_t pos = folio_file_pos(folio); 57 68 58 69 _enter(""); 59 70 60 - if (netfs_folio_group(folio) != netfs_group) 71 + if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) 61 72 return NETFS_FLUSH_CONTENT; 62 73 63 74 if (folio_test_uptodate(folio)) ··· 72 81 73 82 if (file->f_mode & FMODE_READ) 74 83 goto no_write_streaming; 75 - if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 76 - goto no_write_streaming; 77 84 78 85 if (netfs_is_cache_enabled(ctx)) { 79 86 /* We don't want to get a streaming write on a file that loses 80 87 * caching service temporarily because the backing store got 81 88 * culled. 82 89 */ 83 - if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 84 - set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); 85 90 goto no_write_streaming; 86 91 } 87 92 ··· 117 130 mapping_gfp_mask(mapping)); 118 131 } 119 132 133 + /* 134 + * Update i_size and estimate the update to i_blocks to reflect the additional 135 + * data written into the pagecache until we can find out from the server what 136 + * the values actually are. 137 + */ 138 + static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, 139 + loff_t i_size, loff_t pos, size_t copied) 140 + { 141 + blkcnt_t add; 142 + size_t gap; 143 + 144 + if (ctx->ops->update_i_size) { 145 + ctx->ops->update_i_size(inode, pos); 146 + return; 147 + } 148 + 149 + i_size_write(inode, pos); 150 + #if IS_ENABLED(CONFIG_FSCACHE) 151 + fscache_update_cookie(ctx->cache, NULL, &pos); 152 + #endif 153 + 154 + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); 155 + if (copied > gap) { 156 + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); 157 + 158 + inode->i_blocks = min_t(blkcnt_t, 159 + DIV_ROUND_UP(pos, SECTOR_SIZE), 160 + inode->i_blocks + add); 161 + } 162 + } 163 + 120 164 /** 121 165 * netfs_perform_write - Copy data into the pagecache. 122 166 * @iocb: The operation parameters ··· 178 160 }; 179 161 struct netfs_io_request *wreq = NULL; 180 162 struct netfs_folio *finfo; 181 - struct folio *folio; 163 + struct folio *folio, *writethrough = NULL; 182 164 enum netfs_how_to_modify howto; 183 165 enum netfs_folio_trace trace; 184 166 unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; ··· 207 189 } 208 190 if (!is_sync_kiocb(iocb)) 209 191 wreq->iocb = iocb; 210 - wreq->cleanup = netfs_cleanup_buffered_write; 192 + netfs_stat(&netfs_n_wh_writethrough); 193 + } else { 194 + netfs_stat(&netfs_n_wh_buffered_write); 211 195 } 212 196 213 197 do { ··· 249 229 flen = folio_size(folio); 250 230 offset = pos & (flen - 1); 251 231 part = min_t(size_t, flen - offset, part); 232 + 233 + /* Wait for writeback to complete. The writeback engine owns 234 + * the info in folio->private and may change it until it 235 + * removes the WB mark. 236 + */ 237 + if (folio_get_private(folio) && 238 + folio_wait_writeback_killable(folio)) { 239 + ret = written ? -EINTR : -ERESTARTSYS; 240 + goto error_folio_unlock; 241 + } 252 242 253 243 if (signal_pending(current)) { 254 244 ret = written ? -EINTR : -ERESTARTSYS; ··· 334 304 maybe_trouble = true; 335 305 iov_iter_revert(iter, copied); 336 306 copied = 0; 307 + folio_unlock(folio); 337 308 goto retry; 338 309 } 339 310 netfs_set_group(folio, netfs_group); ··· 382 351 trace_netfs_folio(folio, trace); 383 352 384 353 /* Update the inode size if we moved the EOF marker */ 385 - i_size = i_size_read(inode); 386 354 pos += copied; 387 - if (pos > i_size) { 388 - if (ctx->ops->update_i_size) { 389 - ctx->ops->update_i_size(inode, pos); 390 - } else { 391 - i_size_write(inode, pos); 392 - #if IS_ENABLED(CONFIG_FSCACHE) 393 - fscache_update_cookie(ctx->cache, NULL, &pos); 394 - #endif 395 - } 396 - } 355 + i_size = i_size_read(inode); 356 + if (pos > i_size) 357 + netfs_update_i_size(ctx, inode, i_size, pos, copied); 397 358 written += copied; 398 359 399 360 if (likely(!wreq)) { 400 361 folio_mark_dirty(folio); 362 + folio_unlock(folio); 401 363 } else { 402 - if (folio_test_dirty(folio)) 403 - /* Sigh. mmap. */ 404 - folio_clear_dirty_for_io(folio); 405 - /* We make multiple writes to the folio... */ 406 - if (!folio_test_writeback(folio)) { 407 - folio_wait_fscache(folio); 408 - folio_start_writeback(folio); 409 - folio_start_fscache(folio); 410 - if (wreq->iter.count == 0) 411 - trace_netfs_folio(folio, netfs_folio_trace_wthru); 412 - else 413 - trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 414 - } 415 - netfs_advance_writethrough(wreq, copied, 416 - offset + copied == flen); 364 + netfs_advance_writethrough(wreq, &wbc, folio, copied, 365 + offset + copied == flen, 366 + &writethrough); 367 + /* Folio unlocked */ 417 368 } 418 369 retry: 419 - folio_unlock(folio); 420 370 folio_put(folio); 421 371 folio = NULL; 422 372 ··· 405 393 } while (iov_iter_count(iter)); 406 394 407 395 out: 396 + if (likely(written) && ctx->ops->post_modify) 397 + ctx->ops->post_modify(inode); 398 + 408 399 if (unlikely(wreq)) { 409 - ret2 = netfs_end_writethrough(wreq, iocb); 400 + ret2 = netfs_end_writethrough(wreq, &wbc, writethrough); 410 401 wbc_detach_inode(&wbc); 411 402 if (ret2 == -EIOCBQUEUED) 412 403 return ret2; ··· 520 505 */ 521 506 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) 522 507 { 508 + struct netfs_group *group; 523 509 struct folio *folio = page_folio(vmf->page); 524 510 struct file *file = vmf->vma->vm_file; 525 511 struct inode *inode = file_inode(file); 512 + struct netfs_inode *ictx = netfs_inode(inode); 526 513 vm_fault_t ret = VM_FAULT_RETRY; 527 514 int err; 528 515 ··· 532 515 533 516 sb_start_pagefault(inode->i_sb); 534 517 535 - if (folio_wait_writeback_killable(folio)) 536 - goto out; 537 - 538 518 if (folio_lock_killable(folio) < 0) 539 519 goto out; 520 + 521 + if (folio_wait_writeback_killable(folio)) { 522 + ret = VM_FAULT_LOCKED; 523 + goto out; 524 + } 540 525 541 526 /* Can we see a streaming write here? */ 542 527 if (WARN_ON(!folio_test_uptodate(folio))) { ··· 546 527 goto out; 547 528 } 548 529 549 - if (netfs_folio_group(folio) != netfs_group) { 530 + group = netfs_folio_group(folio); 531 + if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { 550 532 folio_unlock(folio); 551 533 err = filemap_fdatawait_range(inode->i_mapping, 552 534 folio_pos(folio), ··· 571 551 trace_netfs_folio(folio, netfs_folio_trace_mkwrite); 572 552 netfs_set_group(folio, netfs_group); 573 553 file_update_time(file); 554 + if (ictx->ops->post_modify) 555 + ictx->ops->post_modify(inode); 574 556 ret = VM_FAULT_LOCKED; 575 557 out: 576 558 sb_end_pagefault(inode->i_sb); 577 559 return ret; 578 560 } 579 561 EXPORT_SYMBOL(netfs_page_mkwrite); 580 - 581 - /* 582 - * Kill all the pages in the given range 583 - */ 584 - static void netfs_kill_pages(struct address_space *mapping, 585 - loff_t start, loff_t len) 586 - { 587 - struct folio *folio; 588 - pgoff_t index = start / PAGE_SIZE; 589 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 590 - 591 - _enter("%llx-%llx", start, start + len - 1); 592 - 593 - do { 594 - _debug("kill %lx (to %lx)", index, last); 595 - 596 - folio = filemap_get_folio(mapping, index); 597 - if (IS_ERR(folio)) { 598 - next = index + 1; 599 - continue; 600 - } 601 - 602 - next = folio_next_index(folio); 603 - 604 - trace_netfs_folio(folio, netfs_folio_trace_kill); 605 - folio_clear_uptodate(folio); 606 - if (folio_test_fscache(folio)) 607 - folio_end_fscache(folio); 608 - folio_end_writeback(folio); 609 - folio_lock(folio); 610 - generic_error_remove_folio(mapping, folio); 611 - folio_unlock(folio); 612 - folio_put(folio); 613 - 614 - } while (index = next, index <= last); 615 - 616 - _leave(""); 617 - } 618 - 619 - /* 620 - * Redirty all the pages in a given range. 621 - */ 622 - static void netfs_redirty_pages(struct address_space *mapping, 623 - loff_t start, loff_t len) 624 - { 625 - struct folio *folio; 626 - pgoff_t index = start / PAGE_SIZE; 627 - pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 628 - 629 - _enter("%llx-%llx", start, start + len - 1); 630 - 631 - do { 632 - _debug("redirty %llx @%llx", len, start); 633 - 634 - folio = filemap_get_folio(mapping, index); 635 - if (IS_ERR(folio)) { 636 - next = index + 1; 637 - continue; 638 - } 639 - 640 - next = folio_next_index(folio); 641 - trace_netfs_folio(folio, netfs_folio_trace_redirty); 642 - filemap_dirty_folio(mapping, folio); 643 - if (folio_test_fscache(folio)) 644 - folio_end_fscache(folio); 645 - folio_end_writeback(folio); 646 - folio_put(folio); 647 - } while (index = next, index <= last); 648 - 649 - balance_dirty_pages_ratelimited(mapping); 650 - 651 - _leave(""); 652 - } 653 - 654 - /* 655 - * Completion of write to server 656 - */ 657 - static void netfs_pages_written_back(struct netfs_io_request *wreq) 658 - { 659 - struct address_space *mapping = wreq->mapping; 660 - struct netfs_folio *finfo; 661 - struct netfs_group *group = NULL; 662 - struct folio *folio; 663 - pgoff_t last; 664 - int gcount = 0; 665 - 666 - XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); 667 - 668 - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 669 - 670 - rcu_read_lock(); 671 - 672 - last = (wreq->start + wreq->len - 1) / PAGE_SIZE; 673 - xas_for_each(&xas, folio, last) { 674 - WARN(!folio_test_writeback(folio), 675 - "bad %zx @%llx page %lx %lx\n", 676 - wreq->len, wreq->start, folio->index, last); 677 - 678 - if ((finfo = netfs_folio_info(folio))) { 679 - /* Streaming writes cannot be redirtied whilst under 680 - * writeback, so discard the streaming record. 681 - */ 682 - folio_detach_private(folio); 683 - group = finfo->netfs_group; 684 - gcount++; 685 - trace_netfs_folio(folio, netfs_folio_trace_clear_s); 686 - kfree(finfo); 687 - } else if ((group = netfs_folio_group(folio))) { 688 - /* Need to detach the group pointer if the page didn't 689 - * get redirtied. If it has been redirtied, then it 690 - * must be within the same group. 691 - */ 692 - if (folio_test_dirty(folio)) { 693 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 694 - goto end_wb; 695 - } 696 - if (folio_trylock(folio)) { 697 - if (!folio_test_dirty(folio)) { 698 - folio_detach_private(folio); 699 - gcount++; 700 - trace_netfs_folio(folio, netfs_folio_trace_clear_g); 701 - } else { 702 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 703 - } 704 - folio_unlock(folio); 705 - goto end_wb; 706 - } 707 - 708 - xas_pause(&xas); 709 - rcu_read_unlock(); 710 - folio_lock(folio); 711 - if (!folio_test_dirty(folio)) { 712 - folio_detach_private(folio); 713 - gcount++; 714 - trace_netfs_folio(folio, netfs_folio_trace_clear_g); 715 - } else { 716 - trace_netfs_folio(folio, netfs_folio_trace_redirtied); 717 - } 718 - folio_unlock(folio); 719 - rcu_read_lock(); 720 - } else { 721 - trace_netfs_folio(folio, netfs_folio_trace_clear); 722 - } 723 - end_wb: 724 - if (folio_test_fscache(folio)) 725 - folio_end_fscache(folio); 726 - xas_advance(&xas, folio_next_index(folio) - 1); 727 - folio_end_writeback(folio); 728 - } 729 - 730 - rcu_read_unlock(); 731 - netfs_put_group_many(group, gcount); 732 - _leave(""); 733 - } 734 - 735 - /* 736 - * Deal with the disposition of the folios that are under writeback to close 737 - * out the operation. 738 - */ 739 - static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) 740 - { 741 - struct address_space *mapping = wreq->mapping; 742 - 743 - _enter(""); 744 - 745 - switch (wreq->error) { 746 - case 0: 747 - netfs_pages_written_back(wreq); 748 - break; 749 - 750 - default: 751 - pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); 752 - fallthrough; 753 - case -EACCES: 754 - case -EPERM: 755 - case -ENOKEY: 756 - case -EKEYEXPIRED: 757 - case -EKEYREJECTED: 758 - case -EKEYREVOKED: 759 - case -ENETRESET: 760 - case -EDQUOT: 761 - case -ENOSPC: 762 - netfs_redirty_pages(mapping, wreq->start, wreq->len); 763 - break; 764 - 765 - case -EROFS: 766 - case -EIO: 767 - case -EREMOTEIO: 768 - case -EFBIG: 769 - case -ENOENT: 770 - case -ENOMEDIUM: 771 - case -ENXIO: 772 - netfs_kill_pages(mapping, wreq->start, wreq->len); 773 - break; 774 - } 775 - 776 - if (wreq->error) 777 - mapping_set_error(mapping, wreq->error); 778 - if (wreq->netfs_ops->done) 779 - wreq->netfs_ops->done(wreq); 780 - } 781 - 782 - /* 783 - * Extend the region to be written back to include subsequent contiguously 784 - * dirty pages if possible, but don't sleep while doing so. 785 - * 786 - * If this page holds new content, then we can include filler zeros in the 787 - * writeback. 788 - */ 789 - static void netfs_extend_writeback(struct address_space *mapping, 790 - struct netfs_group *group, 791 - struct xa_state *xas, 792 - long *_count, 793 - loff_t start, 794 - loff_t max_len, 795 - bool caching, 796 - size_t *_len, 797 - size_t *_top) 798 - { 799 - struct netfs_folio *finfo; 800 - struct folio_batch fbatch; 801 - struct folio *folio; 802 - unsigned int i; 803 - pgoff_t index = (start + *_len) / PAGE_SIZE; 804 - size_t len; 805 - void *priv; 806 - bool stop = true; 807 - 808 - folio_batch_init(&fbatch); 809 - 810 - do { 811 - /* Firstly, we gather up a batch of contiguous dirty pages 812 - * under the RCU read lock - but we can't clear the dirty flags 813 - * there if any of those pages are mapped. 814 - */ 815 - rcu_read_lock(); 816 - 817 - xas_for_each(xas, folio, ULONG_MAX) { 818 - stop = true; 819 - if (xas_retry(xas, folio)) 820 - continue; 821 - if (xa_is_value(folio)) 822 - break; 823 - if (folio->index != index) { 824 - xas_reset(xas); 825 - break; 826 - } 827 - 828 - if (!folio_try_get_rcu(folio)) { 829 - xas_reset(xas); 830 - continue; 831 - } 832 - 833 - /* Has the folio moved or been split? */ 834 - if (unlikely(folio != xas_reload(xas))) { 835 - folio_put(folio); 836 - xas_reset(xas); 837 - break; 838 - } 839 - 840 - if (!folio_trylock(folio)) { 841 - folio_put(folio); 842 - xas_reset(xas); 843 - break; 844 - } 845 - if (!folio_test_dirty(folio) || 846 - folio_test_writeback(folio) || 847 - folio_test_fscache(folio)) { 848 - folio_unlock(folio); 849 - folio_put(folio); 850 - xas_reset(xas); 851 - break; 852 - } 853 - 854 - stop = false; 855 - len = folio_size(folio); 856 - priv = folio_get_private(folio); 857 - if ((const struct netfs_group *)priv != group) { 858 - stop = true; 859 - finfo = netfs_folio_info(folio); 860 - if (finfo->netfs_group != group || 861 - finfo->dirty_offset > 0) { 862 - folio_unlock(folio); 863 - folio_put(folio); 864 - xas_reset(xas); 865 - break; 866 - } 867 - len = finfo->dirty_len; 868 - } 869 - 870 - *_top += folio_size(folio); 871 - index += folio_nr_pages(folio); 872 - *_count -= folio_nr_pages(folio); 873 - *_len += len; 874 - if (*_len >= max_len || *_count <= 0) 875 - stop = true; 876 - 877 - if (!folio_batch_add(&fbatch, folio)) 878 - break; 879 - if (stop) 880 - break; 881 - } 882 - 883 - xas_pause(xas); 884 - rcu_read_unlock(); 885 - 886 - /* Now, if we obtained any folios, we can shift them to being 887 - * writable and mark them for caching. 888 - */ 889 - if (!folio_batch_count(&fbatch)) 890 - break; 891 - 892 - for (i = 0; i < folio_batch_count(&fbatch); i++) { 893 - folio = fbatch.folios[i]; 894 - trace_netfs_folio(folio, netfs_folio_trace_store_plus); 895 - 896 - if (!folio_clear_dirty_for_io(folio)) 897 - BUG(); 898 - folio_start_writeback(folio); 899 - netfs_folio_start_fscache(caching, folio); 900 - folio_unlock(folio); 901 - } 902 - 903 - folio_batch_release(&fbatch); 904 - cond_resched(); 905 - } while (!stop); 906 - } 907 - 908 - /* 909 - * Synchronously write back the locked page and any subsequent non-locked dirty 910 - * pages. 911 - */ 912 - static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, 913 - struct writeback_control *wbc, 914 - struct netfs_group *group, 915 - struct xa_state *xas, 916 - struct folio *folio, 917 - unsigned long long start, 918 - unsigned long long end) 919 - { 920 - struct netfs_io_request *wreq; 921 - struct netfs_folio *finfo; 922 - struct netfs_inode *ctx = netfs_inode(mapping->host); 923 - unsigned long long i_size = i_size_read(&ctx->inode); 924 - size_t len, max_len; 925 - bool caching = netfs_is_cache_enabled(ctx); 926 - long count = wbc->nr_to_write; 927 - int ret; 928 - 929 - _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); 930 - 931 - wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), 932 - NETFS_WRITEBACK); 933 - if (IS_ERR(wreq)) { 934 - folio_unlock(folio); 935 - return PTR_ERR(wreq); 936 - } 937 - 938 - if (!folio_clear_dirty_for_io(folio)) 939 - BUG(); 940 - folio_start_writeback(folio); 941 - netfs_folio_start_fscache(caching, folio); 942 - 943 - count -= folio_nr_pages(folio); 944 - 945 - /* Find all consecutive lockable dirty pages that have contiguous 946 - * written regions, stopping when we find a page that is not 947 - * immediately lockable, is not dirty or is missing, or we reach the 948 - * end of the range. 949 - */ 950 - trace_netfs_folio(folio, netfs_folio_trace_store); 951 - 952 - len = wreq->len; 953 - finfo = netfs_folio_info(folio); 954 - if (finfo) { 955 - start += finfo->dirty_offset; 956 - if (finfo->dirty_offset + finfo->dirty_len != len) { 957 - len = finfo->dirty_len; 958 - goto cant_expand; 959 - } 960 - len = finfo->dirty_len; 961 - } 962 - 963 - if (start < i_size) { 964 - /* Trim the write to the EOF; the extra data is ignored. Also 965 - * put an upper limit on the size of a single storedata op. 966 - */ 967 - max_len = 65536 * 4096; 968 - max_len = min_t(unsigned long long, max_len, end - start + 1); 969 - max_len = min_t(unsigned long long, max_len, i_size - start); 970 - 971 - if (len < max_len) 972 - netfs_extend_writeback(mapping, group, xas, &count, start, 973 - max_len, caching, &len, &wreq->upper_len); 974 - } 975 - 976 - cant_expand: 977 - len = min_t(unsigned long long, len, i_size - start); 978 - 979 - /* We now have a contiguous set of dirty pages, each with writeback 980 - * set; the first page is still locked at this point, but all the rest 981 - * have been unlocked. 982 - */ 983 - folio_unlock(folio); 984 - wreq->start = start; 985 - wreq->len = len; 986 - 987 - if (start < i_size) { 988 - _debug("write back %zx @%llx [%llx]", len, start, i_size); 989 - 990 - /* Speculatively write to the cache. We have to fix this up 991 - * later if the store fails. 992 - */ 993 - wreq->cleanup = netfs_cleanup_buffered_write; 994 - 995 - iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, 996 - wreq->upper_len); 997 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 998 - ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); 999 - if (ret == 0 || ret == -EIOCBQUEUED) 1000 - wbc->nr_to_write -= len / PAGE_SIZE; 1001 - } else { 1002 - _debug("write discard %zx @%llx [%llx]", len, start, i_size); 1003 - 1004 - /* The dirty region was entirely beyond the EOF. */ 1005 - fscache_clear_page_bits(mapping, start, len, caching); 1006 - netfs_pages_written_back(wreq); 1007 - ret = 0; 1008 - } 1009 - 1010 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1011 - _leave(" = 1"); 1012 - return 1; 1013 - } 1014 - 1015 - /* 1016 - * Write a region of pages back to the server 1017 - */ 1018 - static ssize_t netfs_writepages_begin(struct address_space *mapping, 1019 - struct writeback_control *wbc, 1020 - struct netfs_group *group, 1021 - struct xa_state *xas, 1022 - unsigned long long *_start, 1023 - unsigned long long end) 1024 - { 1025 - const struct netfs_folio *finfo; 1026 - struct folio *folio; 1027 - unsigned long long start = *_start; 1028 - ssize_t ret; 1029 - void *priv; 1030 - int skips = 0; 1031 - 1032 - _enter("%llx,%llx,", start, end); 1033 - 1034 - search_again: 1035 - /* Find the first dirty page in the group. */ 1036 - rcu_read_lock(); 1037 - 1038 - for (;;) { 1039 - folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 1040 - if (xas_retry(xas, folio) || xa_is_value(folio)) 1041 - continue; 1042 - if (!folio) 1043 - break; 1044 - 1045 - if (!folio_try_get_rcu(folio)) { 1046 - xas_reset(xas); 1047 - continue; 1048 - } 1049 - 1050 - if (unlikely(folio != xas_reload(xas))) { 1051 - folio_put(folio); 1052 - xas_reset(xas); 1053 - continue; 1054 - } 1055 - 1056 - /* Skip any dirty folio that's not in the group of interest. */ 1057 - priv = folio_get_private(folio); 1058 - if ((const struct netfs_group *)priv != group) { 1059 - finfo = netfs_folio_info(folio); 1060 - if (finfo->netfs_group != group) { 1061 - folio_put(folio); 1062 - continue; 1063 - } 1064 - } 1065 - 1066 - xas_pause(xas); 1067 - break; 1068 - } 1069 - rcu_read_unlock(); 1070 - if (!folio) 1071 - return 0; 1072 - 1073 - start = folio_pos(folio); /* May regress with THPs */ 1074 - 1075 - _debug("wback %lx", folio->index); 1076 - 1077 - /* At this point we hold neither the i_pages lock nor the page lock: 1078 - * the page may be truncated or invalidated (changing page->mapping to 1079 - * NULL), or even swizzled back from swapper_space to tmpfs file 1080 - * mapping 1081 - */ 1082 - lock_again: 1083 - if (wbc->sync_mode != WB_SYNC_NONE) { 1084 - ret = folio_lock_killable(folio); 1085 - if (ret < 0) 1086 - return ret; 1087 - } else { 1088 - if (!folio_trylock(folio)) 1089 - goto search_again; 1090 - } 1091 - 1092 - if (folio->mapping != mapping || 1093 - !folio_test_dirty(folio)) { 1094 - start += folio_size(folio); 1095 - folio_unlock(folio); 1096 - goto search_again; 1097 - } 1098 - 1099 - if (folio_test_writeback(folio) || 1100 - folio_test_fscache(folio)) { 1101 - folio_unlock(folio); 1102 - if (wbc->sync_mode != WB_SYNC_NONE) { 1103 - folio_wait_writeback(folio); 1104 - #ifdef CONFIG_FSCACHE 1105 - folio_wait_fscache(folio); 1106 - #endif 1107 - goto lock_again; 1108 - } 1109 - 1110 - start += folio_size(folio); 1111 - if (wbc->sync_mode == WB_SYNC_NONE) { 1112 - if (skips >= 5 || need_resched()) { 1113 - ret = 0; 1114 - goto out; 1115 - } 1116 - skips++; 1117 - } 1118 - goto search_again; 1119 - } 1120 - 1121 - ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, 1122 - folio, start, end); 1123 - out: 1124 - if (ret > 0) 1125 - *_start = start + ret; 1126 - _leave(" = %zd [%llx]", ret, *_start); 1127 - return ret; 1128 - } 1129 - 1130 - /* 1131 - * Write a region of pages back to the server 1132 - */ 1133 - static int netfs_writepages_region(struct address_space *mapping, 1134 - struct writeback_control *wbc, 1135 - struct netfs_group *group, 1136 - unsigned long long *_start, 1137 - unsigned long long end) 1138 - { 1139 - ssize_t ret; 1140 - 1141 - XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 1142 - 1143 - do { 1144 - ret = netfs_writepages_begin(mapping, wbc, group, &xas, 1145 - _start, end); 1146 - if (ret > 0 && wbc->nr_to_write > 0) 1147 - cond_resched(); 1148 - } while (ret > 0 && wbc->nr_to_write > 0); 1149 - 1150 - return ret > 0 ? 0 : ret; 1151 - } 1152 - 1153 - /* 1154 - * write some of the pending data back to the server 1155 - */ 1156 - int netfs_writepages(struct address_space *mapping, 1157 - struct writeback_control *wbc) 1158 - { 1159 - struct netfs_group *group = NULL; 1160 - loff_t start, end; 1161 - int ret; 1162 - 1163 - _enter(""); 1164 - 1165 - /* We have to be careful as we can end up racing with setattr() 1166 - * truncating the pagecache since the caller doesn't take a lock here 1167 - * to prevent it. 1168 - */ 1169 - 1170 - if (wbc->range_cyclic && mapping->writeback_index) { 1171 - start = mapping->writeback_index * PAGE_SIZE; 1172 - ret = netfs_writepages_region(mapping, wbc, group, 1173 - &start, LLONG_MAX); 1174 - if (ret < 0) 1175 - goto out; 1176 - 1177 - if (wbc->nr_to_write <= 0) { 1178 - mapping->writeback_index = start / PAGE_SIZE; 1179 - goto out; 1180 - } 1181 - 1182 - start = 0; 1183 - end = mapping->writeback_index * PAGE_SIZE; 1184 - mapping->writeback_index = 0; 1185 - ret = netfs_writepages_region(mapping, wbc, group, &start, end); 1186 - if (ret == 0) 1187 - mapping->writeback_index = start / PAGE_SIZE; 1188 - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 1189 - start = 0; 1190 - ret = netfs_writepages_region(mapping, wbc, group, 1191 - &start, LLONG_MAX); 1192 - if (wbc->nr_to_write > 0 && ret == 0) 1193 - mapping->writeback_index = start / PAGE_SIZE; 1194 - } else { 1195 - start = wbc->range_start; 1196 - ret = netfs_writepages_region(mapping, wbc, group, 1197 - &start, wbc->range_end); 1198 - } 1199 - 1200 - out: 1201 - _leave(" = %d", ret); 1202 - return ret; 1203 - } 1204 - EXPORT_SYMBOL(netfs_writepages); 1205 - 1206 - /* 1207 - * Deal with the disposition of a laundered folio. 1208 - */ 1209 - static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) 1210 - { 1211 - if (wreq->error) { 1212 - pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); 1213 - mapping_set_error(wreq->mapping, wreq->error); 1214 - } 1215 - } 1216 - 1217 - /** 1218 - * netfs_launder_folio - Clean up a dirty folio that's being invalidated 1219 - * @folio: The folio to clean 1220 - * 1221 - * This is called to write back a folio that's being invalidated when an inode 1222 - * is getting torn down. Ideally, writepages would be used instead. 1223 - */ 1224 - int netfs_launder_folio(struct folio *folio) 1225 - { 1226 - struct netfs_io_request *wreq; 1227 - struct address_space *mapping = folio->mapping; 1228 - struct netfs_folio *finfo = netfs_folio_info(folio); 1229 - struct netfs_group *group = netfs_folio_group(folio); 1230 - struct bio_vec bvec; 1231 - unsigned long long i_size = i_size_read(mapping->host); 1232 - unsigned long long start = folio_pos(folio); 1233 - size_t offset = 0, len; 1234 - int ret = 0; 1235 - 1236 - if (finfo) { 1237 - offset = finfo->dirty_offset; 1238 - start += offset; 1239 - len = finfo->dirty_len; 1240 - } else { 1241 - len = folio_size(folio); 1242 - } 1243 - len = min_t(unsigned long long, len, i_size - start); 1244 - 1245 - wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); 1246 - if (IS_ERR(wreq)) { 1247 - ret = PTR_ERR(wreq); 1248 - goto out; 1249 - } 1250 - 1251 - if (!folio_clear_dirty_for_io(folio)) 1252 - goto out_put; 1253 - 1254 - trace_netfs_folio(folio, netfs_folio_trace_launder); 1255 - 1256 - _debug("launder %llx-%llx", start, start + len - 1); 1257 - 1258 - /* Speculatively write to the cache. We have to fix this up later if 1259 - * the store fails. 1260 - */ 1261 - wreq->cleanup = netfs_cleanup_launder_folio; 1262 - 1263 - bvec_set_folio(&bvec, folio, len, offset); 1264 - iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); 1265 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 1266 - ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); 1267 - 1268 - out_put: 1269 - folio_detach_private(folio); 1270 - netfs_put_group(group); 1271 - kfree(finfo); 1272 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1273 - out: 1274 - folio_wait_fscache(folio); 1275 - _leave(" = %d", ret); 1276 - return ret; 1277 - } 1278 - EXPORT_SYMBOL(netfs_launder_folio);

+39 -17

fs/netfs/direct_write.c

··· 34 34 unsigned long long start = iocb->ki_pos; 35 35 unsigned long long end = start + iov_iter_count(iter); 36 36 ssize_t ret, n; 37 + size_t len = iov_iter_count(iter); 37 38 bool async = !is_sync_kiocb(iocb); 38 39 39 40 _enter(""); ··· 47 46 48 47 _debug("uw %llx-%llx", start, end); 49 48 50 - wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, 51 - start, end - start, 52 - iocb->ki_flags & IOCB_DIRECT ? 53 - NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); 49 + wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, 50 + iocb->ki_flags & IOCB_DIRECT ? 51 + NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE); 54 52 if (IS_ERR(wreq)) 55 53 return PTR_ERR(wreq); 54 + 55 + wreq->io_streams[0].avail = true; 56 + trace_netfs_write(wreq, (iocb->ki_flags & IOCB_DIRECT ? 57 + netfs_write_trace_dio_write : 58 + netfs_write_trace_unbuffered_write)); 56 59 57 60 { 58 61 /* If this is an async op and we're not using a bounce buffer, ··· 68 63 * request. 69 64 */ 70 65 if (async || user_backed_iter(iter)) { 71 - n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0); 66 + n = netfs_extract_user_iter(iter, len, &wreq->iter, 0); 72 67 if (n < 0) { 73 68 ret = n; 74 69 goto out; ··· 76 71 wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec; 77 72 wreq->direct_bv_count = n; 78 73 wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); 79 - wreq->len = iov_iter_count(&wreq->iter); 80 74 } else { 81 75 wreq->iter = *iter; 82 76 } 83 77 84 78 wreq->io_iter = wreq->iter; 85 79 } 80 + 81 + __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); 86 82 87 83 /* Copy the data into the bounce buffer and encrypt it. */ 88 84 // TODO ··· 93 87 if (async) 94 88 wreq->iocb = iocb; 95 89 wreq->cleanup = netfs_cleanup_dio_write; 96 - ret = netfs_begin_write(wreq, is_sync_kiocb(iocb), 97 - iocb->ki_flags & IOCB_DIRECT ? 98 - netfs_write_trace_dio_write : 99 - netfs_write_trace_unbuffered_write); 90 + ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter)); 100 91 if (ret < 0) { 101 92 _debug("begin = %zd", ret); 102 93 goto out; ··· 103 100 trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); 104 101 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 105 102 TASK_UNINTERRUPTIBLE); 106 - 103 + smp_rmb(); /* Read error/transferred after RIP flag */ 107 104 ret = wreq->error; 108 - _debug("waited = %zd", ret); 109 105 if (ret == 0) { 110 106 ret = wreq->transferred; 111 107 iocb->ki_pos += ret; ··· 134 132 ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) 135 133 { 136 134 struct file *file = iocb->ki_filp; 137 - struct inode *inode = file->f_mapping->host; 135 + struct address_space *mapping = file->f_mapping; 136 + struct inode *inode = mapping->host; 138 137 struct netfs_inode *ictx = netfs_inode(inode); 139 - unsigned long long end; 140 138 ssize_t ret; 139 + loff_t pos = iocb->ki_pos; 140 + unsigned long long end = pos + iov_iter_count(from) - 1; 141 141 142 - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 142 + _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); 143 143 144 144 if (!iov_iter_count(from)) 145 145 return 0; 146 146 147 147 trace_netfs_write_iter(iocb, from); 148 - netfs_stat(&netfs_n_rh_dio_write); 148 + netfs_stat(&netfs_n_wh_dio_write); 149 149 150 150 ret = netfs_start_io_direct(inode); 151 151 if (ret < 0) ··· 161 157 ret = file_update_time(file); 162 158 if (ret < 0) 163 159 goto out; 164 - ret = kiocb_invalidate_pages(iocb, iov_iter_count(from)); 160 + if (iocb->ki_flags & IOCB_NOWAIT) { 161 + /* We could block if there are any pages in the range. */ 162 + ret = -EAGAIN; 163 + if (filemap_range_has_page(mapping, pos, end)) 164 + if (filemap_invalidate_inode(inode, true, pos, end)) 165 + goto out; 166 + } else { 167 + ret = filemap_write_and_wait_range(mapping, pos, end); 168 + if (ret < 0) 169 + goto out; 170 + } 171 + 172 + /* 173 + * After a write we want buffered reads to be sure to go to disk to get 174 + * the new data. We invalidate clean cached page from the region we're 175 + * about to write. We do this *before* the write so that we can return 176 + * without clobbering -EIOCBQUEUED from ->direct_IO(). 177 + */ 178 + ret = filemap_invalidate_inode(inode, true, pos, end); 165 179 if (ret < 0) 166 180 goto out; 167 181 end = iocb->ki_pos + iov_iter_count(from);

+9 -5

fs/netfs/fscache_io.c

··· 166 166 loff_t start; 167 167 size_t len; 168 168 bool set_bits; 169 + bool using_pgpriv2; 169 170 netfs_io_terminated_t term_func; 170 171 void *term_func_priv; 171 172 }; ··· 183 182 184 183 rcu_read_lock(); 185 184 xas_for_each(&xas, page, last) { 186 - end_page_fscache(page); 185 + folio_end_private_2(page_folio(page)); 187 186 } 188 187 rcu_read_unlock(); 189 188 } ··· 198 197 { 199 198 struct fscache_write_request *wreq = priv; 200 199 201 - fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, 202 - wreq->set_bits); 200 + if (wreq->using_pgpriv2) 201 + fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, 202 + wreq->set_bits); 203 203 204 204 if (wreq->term_func) 205 205 wreq->term_func(wreq->term_func_priv, transferred_or_error, ··· 214 212 loff_t start, size_t len, loff_t i_size, 215 213 netfs_io_terminated_t term_func, 216 214 void *term_func_priv, 217 - bool cond) 215 + bool using_pgpriv2, bool cond) 218 216 { 219 217 struct fscache_write_request *wreq; 220 218 struct netfs_cache_resources *cres; ··· 232 230 wreq->mapping = mapping; 233 231 wreq->start = start; 234 232 wreq->len = len; 233 + wreq->using_pgpriv2 = using_pgpriv2; 235 234 wreq->set_bits = cond; 236 235 wreq->term_func = term_func; 237 236 wreq->term_func_priv = term_func_priv; ··· 260 257 abandon_free: 261 258 kfree(wreq); 262 259 abandon: 263 - fscache_clear_page_bits(mapping, start, len, cond); 260 + if (using_pgpriv2) 261 + fscache_clear_page_bits(mapping, start, len, cond); 264 262 if (term_func) 265 263 term_func(term_func_priv, ret, false); 266 264 }

+41 -14

fs/netfs/internal.h

··· 37 37 extern unsigned int netfs_debug; 38 38 extern struct list_head netfs_io_requests; 39 39 extern spinlock_t netfs_proc_lock; 40 + extern mempool_t netfs_request_pool; 41 + extern mempool_t netfs_subrequest_pool; 40 42 41 43 #ifdef CONFIG_PROC_FS 42 44 static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) ··· 93 91 } 94 92 95 93 /* 96 - * output.c 97 - */ 98 - int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 99 - enum netfs_write_trace what); 100 - struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 101 - int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end); 102 - int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb); 103 - 104 - /* 105 94 * stats.c 106 95 */ 107 96 #ifdef CONFIG_NETFS_STATS 108 97 extern atomic_t netfs_n_rh_dio_read; 109 - extern atomic_t netfs_n_rh_dio_write; 110 98 extern atomic_t netfs_n_rh_readahead; 111 - extern atomic_t netfs_n_rh_readpage; 99 + extern atomic_t netfs_n_rh_read_folio; 112 100 extern atomic_t netfs_n_rh_rreq; 113 101 extern atomic_t netfs_n_rh_sreq; 114 102 extern atomic_t netfs_n_rh_download; ··· 115 123 extern atomic_t netfs_n_rh_write_done; 116 124 extern atomic_t netfs_n_rh_write_failed; 117 125 extern atomic_t netfs_n_rh_write_zskip; 126 + extern atomic_t netfs_n_wh_buffered_write; 127 + extern atomic_t netfs_n_wh_writethrough; 128 + extern atomic_t netfs_n_wh_dio_write; 129 + extern atomic_t netfs_n_wh_writepages; 118 130 extern atomic_t netfs_n_wh_wstream_conflict; 119 131 extern atomic_t netfs_n_wh_upload; 120 132 extern atomic_t netfs_n_wh_upload_done; ··· 145 149 #endif 146 150 147 151 /* 152 + * write_collect.c 153 + */ 154 + int netfs_folio_written_back(struct folio *folio); 155 + void netfs_write_collection_worker(struct work_struct *work); 156 + void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async); 157 + 158 + /* 159 + * write_issue.c 160 + */ 161 + struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 162 + struct file *file, 163 + loff_t start, 164 + enum netfs_io_origin origin); 165 + void netfs_reissue_write(struct netfs_io_stream *stream, 166 + struct netfs_io_subrequest *subreq); 167 + int netfs_advance_write(struct netfs_io_request *wreq, 168 + struct netfs_io_stream *stream, 169 + loff_t start, size_t len, bool to_eof); 170 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); 171 + int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 172 + struct folio *folio, size_t copied, bool to_page_end, 173 + struct folio **writethrough_cache); 174 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 175 + struct folio *writethrough_cache); 176 + int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); 177 + 178 + /* 148 179 * Miscellaneous functions. 149 180 */ 150 181 static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx) ··· 191 168 */ 192 169 static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) 193 170 { 194 - if (netfs_group) 171 + if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE) 195 172 refcount_inc(&netfs_group->ref); 196 173 return netfs_group; 197 174 } ··· 201 178 */ 202 179 static inline void netfs_put_group(struct netfs_group *netfs_group) 203 180 { 204 - if (netfs_group && refcount_dec_and_test(&netfs_group->ref)) 181 + if (netfs_group && 182 + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && 183 + refcount_dec_and_test(&netfs_group->ref)) 205 184 netfs_group->free(netfs_group); 206 185 } 207 186 ··· 212 187 */ 213 188 static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) 214 189 { 215 - if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref)) 190 + if (netfs_group && 191 + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && 192 + refcount_sub_and_test(nr, &netfs_group->ref)) 216 193 netfs_group->free(netfs_group); 217 194 } 218 195

+11 -151

fs/netfs/io.c

··· 99 99 } 100 100 101 101 /* 102 - * Deal with the completion of writing the data to the cache. We have to clear 103 - * the PG_fscache bits on the folios involved and release the caller's ref. 104 - * 105 - * May be called in softirq mode and we inherit a ref from the caller. 106 - */ 107 - static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, 108 - bool was_async) 109 - { 110 - struct netfs_io_subrequest *subreq; 111 - struct folio *folio; 112 - pgoff_t unlocked = 0; 113 - bool have_unlocked = false; 114 - 115 - rcu_read_lock(); 116 - 117 - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 118 - XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); 119 - 120 - xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { 121 - if (xas_retry(&xas, folio)) 122 - continue; 123 - 124 - /* We might have multiple writes from the same huge 125 - * folio, but we mustn't unlock a folio more than once. 126 - */ 127 - if (have_unlocked && folio->index <= unlocked) 128 - continue; 129 - unlocked = folio_next_index(folio) - 1; 130 - trace_netfs_folio(folio, netfs_folio_trace_end_copy); 131 - folio_end_fscache(folio); 132 - have_unlocked = true; 133 - } 134 - } 135 - 136 - rcu_read_unlock(); 137 - netfs_rreq_completed(rreq, was_async); 138 - } 139 - 140 - static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, 141 - bool was_async) 142 - { 143 - struct netfs_io_subrequest *subreq = priv; 144 - struct netfs_io_request *rreq = subreq->rreq; 145 - 146 - if (IS_ERR_VALUE(transferred_or_error)) { 147 - netfs_stat(&netfs_n_rh_write_failed); 148 - trace_netfs_failure(rreq, subreq, transferred_or_error, 149 - netfs_fail_copy_to_cache); 150 - } else { 151 - netfs_stat(&netfs_n_rh_write_done); 152 - } 153 - 154 - trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); 155 - 156 - /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ 157 - if (atomic_dec_and_test(&rreq->nr_copy_ops)) 158 - netfs_rreq_unmark_after_write(rreq, was_async); 159 - 160 - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 161 - } 162 - 163 - /* 164 - * Perform any outstanding writes to the cache. We inherit a ref from the 165 - * caller. 166 - */ 167 - static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) 168 - { 169 - struct netfs_cache_resources *cres = &rreq->cache_resources; 170 - struct netfs_io_subrequest *subreq, *next, *p; 171 - struct iov_iter iter; 172 - int ret; 173 - 174 - trace_netfs_rreq(rreq, netfs_rreq_trace_copy); 175 - 176 - /* We don't want terminating writes trying to wake us up whilst we're 177 - * still going through the list. 178 - */ 179 - atomic_inc(&rreq->nr_copy_ops); 180 - 181 - list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { 182 - if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { 183 - list_del_init(&subreq->rreq_link); 184 - netfs_put_subrequest(subreq, false, 185 - netfs_sreq_trace_put_no_copy); 186 - } 187 - } 188 - 189 - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 190 - /* Amalgamate adjacent writes */ 191 - while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 192 - next = list_next_entry(subreq, rreq_link); 193 - if (next->start != subreq->start + subreq->len) 194 - break; 195 - subreq->len += next->len; 196 - list_del_init(&next->rreq_link); 197 - netfs_put_subrequest(next, false, 198 - netfs_sreq_trace_put_merged); 199 - } 200 - 201 - ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, 202 - subreq->len, rreq->i_size, true); 203 - if (ret < 0) { 204 - trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); 205 - trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); 206 - continue; 207 - } 208 - 209 - iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, 210 - subreq->start, subreq->len); 211 - 212 - atomic_inc(&rreq->nr_copy_ops); 213 - netfs_stat(&netfs_n_rh_write); 214 - netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); 215 - trace_netfs_sreq(subreq, netfs_sreq_trace_write); 216 - cres->ops->write(cres, subreq->start, &iter, 217 - netfs_rreq_copy_terminated, subreq); 218 - } 219 - 220 - /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ 221 - if (atomic_dec_and_test(&rreq->nr_copy_ops)) 222 - netfs_rreq_unmark_after_write(rreq, false); 223 - } 224 - 225 - static void netfs_rreq_write_to_cache_work(struct work_struct *work) 226 - { 227 - struct netfs_io_request *rreq = 228 - container_of(work, struct netfs_io_request, work); 229 - 230 - netfs_rreq_do_write_to_cache(rreq); 231 - } 232 - 233 - static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) 234 - { 235 - rreq->work.func = netfs_rreq_write_to_cache_work; 236 - if (!queue_work(system_unbound_wq, &rreq->work)) 237 - BUG(); 238 - } 239 - 240 - /* 241 102 * Handle a short read. 242 103 */ 243 104 static void netfs_rreq_short_read(struct netfs_io_request *rreq, ··· 213 352 unsigned int i; 214 353 size_t transferred = 0; 215 354 216 - for (i = 0; i < rreq->direct_bv_count; i++) 355 + for (i = 0; i < rreq->direct_bv_count; i++) { 217 356 flush_dcache_page(rreq->direct_bv[i].bv_page); 357 + // TODO: cifs marks pages in the destination buffer 358 + // dirty under some circumstances after a read. Do we 359 + // need to do that too? 360 + set_page_dirty(rreq->direct_bv[i].bv_page); 361 + } 218 362 219 363 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 220 364 if (subreq->error || subreq->transferred == 0) ··· 274 408 trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); 275 409 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 276 410 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); 277 - 278 - if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) 279 - return netfs_rreq_write_to_cache(rreq); 280 411 281 412 netfs_rreq_completed(rreq, was_async); 282 413 } ··· 481 618 482 619 set: 483 620 if (subreq->len > rreq->len) 484 - pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n", 621 + pr_warn("R=%08x[%u] SREQ>RREQ %zx > %llx\n", 485 622 rreq->debug_id, subreq->debug_index, 486 623 subreq->len, rreq->len); 487 624 ··· 506 643 * Slice off a piece of a read request and submit an I/O request for it. 507 644 */ 508 645 static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, 509 - struct iov_iter *io_iter, 510 - unsigned int *_debug_index) 646 + struct iov_iter *io_iter) 511 647 { 512 648 struct netfs_io_subrequest *subreq; 513 649 enum netfs_io_source source; ··· 515 653 if (!subreq) 516 654 return false; 517 655 518 - subreq->debug_index = (*_debug_index)++; 519 656 subreq->start = rreq->start + rreq->submitted; 520 657 subreq->len = io_iter->count; 521 658 522 - _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); 659 + _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); 523 660 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 524 661 525 662 /* Call out to the cache to find out what it can do with the remaining ··· 568 707 int netfs_begin_read(struct netfs_io_request *rreq, bool sync) 569 708 { 570 709 struct iov_iter io_iter; 571 - unsigned int debug_index = 0; 572 710 int ret; 573 711 574 712 _enter("R=%x %llx-%llx", ··· 593 733 atomic_set(&rreq->nr_outstanding, 1); 594 734 io_iter = rreq->io_iter; 595 735 do { 596 - _debug("submit %llx + %zx >= %llx", 736 + _debug("submit %llx + %llx >= %llx", 597 737 rreq->start, rreq->submitted, rreq->i_size); 598 738 if (rreq->origin == NETFS_DIO_READ && 599 739 rreq->start + rreq->submitted >= rreq->i_size) 600 740 break; 601 - if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index)) 741 + if (!netfs_rreq_submit_slice(rreq, &io_iter)) 602 742 break; 603 743 if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && 604 744 test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))

+47 -8

fs/netfs/main.c

··· 7 7 8 8 #include <linux/module.h> 9 9 #include <linux/export.h> 10 + #include <linux/mempool.h> 10 11 #include <linux/proc_fs.h> 11 12 #include <linux/seq_file.h> 12 13 #include "internal.h" ··· 24 23 module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); 25 24 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); 26 25 26 + static struct kmem_cache *netfs_request_slab; 27 + static struct kmem_cache *netfs_subrequest_slab; 28 + mempool_t netfs_request_pool; 29 + mempool_t netfs_subrequest_pool; 30 + 27 31 #ifdef CONFIG_PROC_FS 28 32 LIST_HEAD(netfs_io_requests); 29 33 DEFINE_SPINLOCK(netfs_proc_lock); ··· 37 31 [NETFS_READAHEAD] = "RA", 38 32 [NETFS_READPAGE] = "RP", 39 33 [NETFS_READ_FOR_WRITE] = "RW", 34 + [NETFS_COPY_TO_CACHE] = "CC", 40 35 [NETFS_WRITEBACK] = "WB", 41 36 [NETFS_WRITETHROUGH] = "WT", 42 - [NETFS_LAUNDER_WRITE] = "LW", 43 37 [NETFS_UNBUFFERED_WRITE] = "UW", 44 38 [NETFS_DIO_READ] = "DR", 45 39 [NETFS_DIO_WRITE] = "DW", ··· 62 56 63 57 rreq = list_entry(v, struct netfs_io_request, proc_link); 64 58 seq_printf(m, 65 - "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx", 59 + "%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx", 66 60 rreq->debug_id, 67 61 netfs_origins[rreq->origin], 68 62 refcount_read(&rreq->ref), ··· 104 98 { 105 99 int ret = -ENOMEM; 106 100 101 + netfs_request_slab = kmem_cache_create("netfs_request", 102 + sizeof(struct netfs_io_request), 0, 103 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 104 + NULL); 105 + if (!netfs_request_slab) 106 + goto error_req; 107 + 108 + if (mempool_init_slab_pool(&netfs_request_pool, 100, netfs_request_slab) < 0) 109 + goto error_reqpool; 110 + 111 + netfs_subrequest_slab = kmem_cache_create("netfs_subrequest", 112 + sizeof(struct netfs_io_subrequest), 0, 113 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 114 + NULL); 115 + if (!netfs_subrequest_slab) 116 + goto error_subreq; 117 + 118 + if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) 119 + goto error_subreqpool; 120 + 107 121 if (!proc_mkdir("fs/netfs", NULL)) 108 - goto error; 122 + goto error_proc; 109 123 if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, 110 124 &netfs_requests_seq_ops)) 111 - goto error_proc; 125 + goto error_procfile; 112 126 #ifdef CONFIG_FSCACHE_STATS 113 127 if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, 114 128 netfs_stats_show)) 115 - goto error_proc; 129 + goto error_procfile; 116 130 #endif 117 131 118 132 ret = fscache_init(); 119 133 if (ret < 0) 120 - goto error_proc; 134 + goto error_fscache; 121 135 return 0; 122 136 123 - error_proc: 137 + error_fscache: 138 + error_procfile: 124 139 remove_proc_entry("fs/netfs", NULL); 125 - error: 140 + error_proc: 141 + mempool_exit(&netfs_subrequest_pool); 142 + error_subreqpool: 143 + kmem_cache_destroy(netfs_subrequest_slab); 144 + error_subreq: 145 + mempool_exit(&netfs_request_pool); 146 + error_reqpool: 147 + kmem_cache_destroy(netfs_request_slab); 148 + error_req: 126 149 return ret; 127 150 } 128 151 fs_initcall(netfs_init); ··· 160 125 { 161 126 fscache_exit(); 162 127 remove_proc_entry("fs/netfs", NULL); 128 + mempool_exit(&netfs_subrequest_pool); 129 + kmem_cache_destroy(netfs_subrequest_slab); 130 + mempool_exit(&netfs_request_pool); 131 + kmem_cache_destroy(netfs_request_slab); 163 132 } 164 133 module_exit(netfs_exit);

+1 -9

fs/netfs/misc.c

··· 177 177 */ 178 178 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 179 179 { 180 - struct netfs_folio *finfo = NULL; 180 + struct netfs_folio *finfo; 181 181 size_t flen = folio_size(folio); 182 182 183 183 _enter("{%lx},%zx,%zx", folio->index, offset, length); 184 - 185 - folio_wait_fscache(folio); 186 184 187 185 if (!folio_test_private(folio)) 188 186 return; ··· 246 248 247 249 if (folio_test_private(folio)) 248 250 return false; 249 - if (folio_test_fscache(folio)) { 250 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 251 - return false; 252 - folio_wait_fscache(folio); 253 - } 254 - 255 251 fscache_note_page_release(netfs_i_cookie(ctx)); 256 252 return true; 257 253 }

+60 -19

fs/netfs/objects.c

··· 6 6 */ 7 7 8 8 #include <linux/slab.h> 9 + #include <linux/mempool.h> 10 + #include <linux/delay.h> 9 11 #include "internal.h" 10 12 11 13 /* ··· 22 20 struct inode *inode = file ? file_inode(file) : mapping->host; 23 21 struct netfs_inode *ctx = netfs_inode(inode); 24 22 struct netfs_io_request *rreq; 23 + mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; 24 + struct kmem_cache *cache = mempool->pool_data; 25 25 bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || 26 26 origin == NETFS_DIO_READ || 27 27 origin == NETFS_DIO_WRITE); 28 28 bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); 29 29 int ret; 30 30 31 - rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request), 32 - GFP_KERNEL); 33 - if (!rreq) 34 - return ERR_PTR(-ENOMEM); 31 + for (;;) { 32 + rreq = mempool_alloc(mempool, GFP_KERNEL); 33 + if (rreq) 34 + break; 35 + msleep(10); 36 + } 35 37 38 + memset(rreq, 0, kmem_cache_size(cache)); 36 39 rreq->start = start; 37 40 rreq->len = len; 38 41 rreq->upper_len = len; ··· 47 40 rreq->inode = inode; 48 41 rreq->i_size = i_size_read(inode); 49 42 rreq->debug_id = atomic_inc_return(&debug_ids); 43 + rreq->wsize = INT_MAX; 44 + spin_lock_init(&rreq->lock); 45 + INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); 46 + INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); 50 47 INIT_LIST_HEAD(&rreq->subrequests); 51 48 INIT_WORK(&rreq->work, NULL); 52 49 refcount_set(&rreq->ref, 1); 53 50 54 51 __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); 55 - if (cached) 52 + if (cached) { 56 53 __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); 54 + if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) 55 + /* Filesystem uses deprecated PG_private_2 marking. */ 56 + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); 57 + } 57 58 if (file && file->f_flags & O_NONBLOCK) 58 59 __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); 59 60 if (rreq->netfs_ops->init_request) { 60 61 ret = rreq->netfs_ops->init_request(rreq, file); 61 62 if (ret < 0) { 62 - kfree(rreq); 63 + mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); 63 64 return ERR_PTR(ret); 64 65 } 65 66 } ··· 89 74 void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) 90 75 { 91 76 struct netfs_io_subrequest *subreq; 77 + struct netfs_io_stream *stream; 78 + int s; 92 79 93 80 while (!list_empty(&rreq->subrequests)) { 94 81 subreq = list_first_entry(&rreq->subrequests, ··· 99 82 netfs_put_subrequest(subreq, was_async, 100 83 netfs_sreq_trace_put_clear); 101 84 } 85 + 86 + for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) { 87 + stream = &rreq->io_streams[s]; 88 + while (!list_empty(&stream->subrequests)) { 89 + subreq = list_first_entry(&stream->subrequests, 90 + struct netfs_io_subrequest, rreq_link); 91 + list_del(&subreq->rreq_link); 92 + netfs_put_subrequest(subreq, was_async, 93 + netfs_sreq_trace_put_clear); 94 + } 95 + } 96 + } 97 + 98 + static void netfs_free_request_rcu(struct rcu_head *rcu) 99 + { 100 + struct netfs_io_request *rreq = container_of(rcu, struct netfs_io_request, rcu); 101 + 102 + mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); 103 + netfs_stat_d(&netfs_n_rh_rreq); 102 104 } 103 105 104 106 static void netfs_free_request(struct work_struct *work) ··· 142 106 } 143 107 kvfree(rreq->direct_bv); 144 108 } 145 - kfree_rcu(rreq, rcu); 146 - netfs_stat_d(&netfs_n_rh_rreq); 109 + call_rcu(&rreq->rcu, netfs_free_request_rcu); 147 110 } 148 111 149 112 void netfs_put_request(struct netfs_io_request *rreq, bool was_async, ··· 174 139 struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq) 175 140 { 176 141 struct netfs_io_subrequest *subreq; 142 + mempool_t *mempool = rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool; 143 + struct kmem_cache *cache = mempool->pool_data; 177 144 178 - subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?: 179 - sizeof(struct netfs_io_subrequest), 180 - GFP_KERNEL); 181 - if (subreq) { 182 - INIT_WORK(&subreq->work, NULL); 183 - INIT_LIST_HEAD(&subreq->rreq_link); 184 - refcount_set(&subreq->ref, 2); 185 - subreq->rreq = rreq; 186 - netfs_get_request(rreq, netfs_rreq_trace_get_subreq); 187 - netfs_stat(&netfs_n_rh_sreq); 145 + for (;;) { 146 + subreq = mempool_alloc(rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool, 147 + GFP_KERNEL); 148 + if (subreq) 149 + break; 150 + msleep(10); 188 151 } 189 152 153 + memset(subreq, 0, kmem_cache_size(cache)); 154 + INIT_WORK(&subreq->work, NULL); 155 + INIT_LIST_HEAD(&subreq->rreq_link); 156 + refcount_set(&subreq->ref, 2); 157 + subreq->rreq = rreq; 158 + subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); 159 + netfs_get_request(rreq, netfs_rreq_trace_get_subreq); 160 + netfs_stat(&netfs_n_rh_sreq); 190 161 return subreq; 191 162 } 192 163 ··· 214 173 trace_netfs_sreq(subreq, netfs_sreq_trace_free); 215 174 if (rreq->netfs_ops->free_subrequest) 216 175 rreq->netfs_ops->free_subrequest(subreq); 217 - kfree(subreq); 176 + mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); 218 177 netfs_stat_d(&netfs_n_rh_sreq); 219 178 netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); 220 179 }

-478

fs/netfs/output.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Network filesystem high-level write support. 3 - * 4 - * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 - * Written by David Howells (dhowells@redhat.com) 6 - */ 7 - 8 - #include <linux/fs.h> 9 - #include <linux/mm.h> 10 - #include <linux/pagemap.h> 11 - #include <linux/slab.h> 12 - #include <linux/writeback.h> 13 - #include <linux/pagevec.h> 14 - #include "internal.h" 15 - 16 - /** 17 - * netfs_create_write_request - Create a write operation. 18 - * @wreq: The write request this is storing from. 19 - * @dest: The destination type 20 - * @start: Start of the region this write will modify 21 - * @len: Length of the modification 22 - * @worker: The worker function to handle the write(s) 23 - * 24 - * Allocate a write operation, set it up and add it to the list on a write 25 - * request. 26 - */ 27 - struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, 28 - enum netfs_io_source dest, 29 - loff_t start, size_t len, 30 - work_func_t worker) 31 - { 32 - struct netfs_io_subrequest *subreq; 33 - 34 - subreq = netfs_alloc_subrequest(wreq); 35 - if (subreq) { 36 - INIT_WORK(&subreq->work, worker); 37 - subreq->source = dest; 38 - subreq->start = start; 39 - subreq->len = len; 40 - subreq->debug_index = wreq->subreq_counter++; 41 - 42 - switch (subreq->source) { 43 - case NETFS_UPLOAD_TO_SERVER: 44 - netfs_stat(&netfs_n_wh_upload); 45 - break; 46 - case NETFS_WRITE_TO_CACHE: 47 - netfs_stat(&netfs_n_wh_write); 48 - break; 49 - default: 50 - BUG(); 51 - } 52 - 53 - subreq->io_iter = wreq->io_iter; 54 - iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); 55 - iov_iter_truncate(&subreq->io_iter, subreq->len); 56 - 57 - trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 58 - refcount_read(&subreq->ref), 59 - netfs_sreq_trace_new); 60 - atomic_inc(&wreq->nr_outstanding); 61 - list_add_tail(&subreq->rreq_link, &wreq->subrequests); 62 - trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 63 - } 64 - 65 - return subreq; 66 - } 67 - EXPORT_SYMBOL(netfs_create_write_request); 68 - 69 - /* 70 - * Process a completed write request once all the component operations have 71 - * been completed. 72 - */ 73 - static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) 74 - { 75 - struct netfs_io_subrequest *subreq; 76 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 77 - size_t transferred = 0; 78 - 79 - _enter("R=%x[]", wreq->debug_id); 80 - 81 - trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 82 - 83 - list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 84 - if (subreq->error || subreq->transferred == 0) 85 - break; 86 - transferred += subreq->transferred; 87 - if (subreq->transferred < subreq->len) 88 - break; 89 - } 90 - wreq->transferred = transferred; 91 - 92 - list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { 93 - if (!subreq->error) 94 - continue; 95 - switch (subreq->source) { 96 - case NETFS_UPLOAD_TO_SERVER: 97 - /* Depending on the type of failure, this may prevent 98 - * writeback completion unless we're in disconnected 99 - * mode. 100 - */ 101 - if (!wreq->error) 102 - wreq->error = subreq->error; 103 - break; 104 - 105 - case NETFS_WRITE_TO_CACHE: 106 - /* Failure doesn't prevent writeback completion unless 107 - * we're in disconnected mode. 108 - */ 109 - if (subreq->error != -ENOBUFS) 110 - ctx->ops->invalidate_cache(wreq); 111 - break; 112 - 113 - default: 114 - WARN_ON_ONCE(1); 115 - if (!wreq->error) 116 - wreq->error = -EIO; 117 - return; 118 - } 119 - } 120 - 121 - wreq->cleanup(wreq); 122 - 123 - if (wreq->origin == NETFS_DIO_WRITE && 124 - wreq->mapping->nrpages) { 125 - pgoff_t first = wreq->start >> PAGE_SHIFT; 126 - pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 127 - invalidate_inode_pages2_range(wreq->mapping, first, last); 128 - } 129 - 130 - if (wreq->origin == NETFS_DIO_WRITE) 131 - inode_dio_end(wreq->inode); 132 - 133 - _debug("finished"); 134 - trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 135 - clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 136 - wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 137 - 138 - if (wreq->iocb) { 139 - wreq->iocb->ki_pos += transferred; 140 - if (wreq->iocb->ki_complete) 141 - wreq->iocb->ki_complete( 142 - wreq->iocb, wreq->error ? wreq->error : transferred); 143 - } 144 - 145 - netfs_clear_subrequests(wreq, was_async); 146 - netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); 147 - } 148 - 149 - /* 150 - * Deal with the completion of writing the data to the cache. 151 - */ 152 - void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 153 - bool was_async) 154 - { 155 - struct netfs_io_subrequest *subreq = _op; 156 - struct netfs_io_request *wreq = subreq->rreq; 157 - unsigned int u; 158 - 159 - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 160 - 161 - switch (subreq->source) { 162 - case NETFS_UPLOAD_TO_SERVER: 163 - netfs_stat(&netfs_n_wh_upload_done); 164 - break; 165 - case NETFS_WRITE_TO_CACHE: 166 - netfs_stat(&netfs_n_wh_write_done); 167 - break; 168 - case NETFS_INVALID_WRITE: 169 - break; 170 - default: 171 - BUG(); 172 - } 173 - 174 - if (IS_ERR_VALUE(transferred_or_error)) { 175 - subreq->error = transferred_or_error; 176 - trace_netfs_failure(wreq, subreq, transferred_or_error, 177 - netfs_fail_write); 178 - goto failed; 179 - } 180 - 181 - if (WARN(transferred_or_error > subreq->len - subreq->transferred, 182 - "Subreq excess write: R%x[%x] %zd > %zu - %zu", 183 - wreq->debug_id, subreq->debug_index, 184 - transferred_or_error, subreq->len, subreq->transferred)) 185 - transferred_or_error = subreq->len - subreq->transferred; 186 - 187 - subreq->error = 0; 188 - subreq->transferred += transferred_or_error; 189 - 190 - if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) 191 - pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", 192 - wreq->debug_id, subreq->debug_index, 193 - iov_iter_count(&subreq->io_iter), subreq->len, 194 - subreq->transferred, subreq->io_iter.iter_type); 195 - 196 - if (subreq->transferred < subreq->len) 197 - goto incomplete; 198 - 199 - __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 200 - out: 201 - trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 202 - 203 - /* If we decrement nr_outstanding to 0, the ref belongs to us. */ 204 - u = atomic_dec_return(&wreq->nr_outstanding); 205 - if (u == 0) 206 - netfs_write_terminated(wreq, was_async); 207 - else if (u == 1) 208 - wake_up_var(&wreq->nr_outstanding); 209 - 210 - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 211 - return; 212 - 213 - incomplete: 214 - if (transferred_or_error == 0) { 215 - if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { 216 - subreq->error = -ENODATA; 217 - goto failed; 218 - } 219 - } else { 220 - __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); 221 - } 222 - 223 - __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); 224 - set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 225 - goto out; 226 - 227 - failed: 228 - switch (subreq->source) { 229 - case NETFS_WRITE_TO_CACHE: 230 - netfs_stat(&netfs_n_wh_write_failed); 231 - set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); 232 - break; 233 - case NETFS_UPLOAD_TO_SERVER: 234 - netfs_stat(&netfs_n_wh_upload_failed); 235 - set_bit(NETFS_RREQ_FAILED, &wreq->flags); 236 - wreq->error = subreq->error; 237 - break; 238 - default: 239 - break; 240 - } 241 - goto out; 242 - } 243 - EXPORT_SYMBOL(netfs_write_subrequest_terminated); 244 - 245 - static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) 246 - { 247 - struct netfs_io_request *wreq = subreq->rreq; 248 - struct netfs_cache_resources *cres = &wreq->cache_resources; 249 - 250 - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 251 - 252 - cres->ops->write(cres, subreq->start, &subreq->io_iter, 253 - netfs_write_subrequest_terminated, subreq); 254 - } 255 - 256 - static void netfs_write_to_cache_op_worker(struct work_struct *work) 257 - { 258 - struct netfs_io_subrequest *subreq = 259 - container_of(work, struct netfs_io_subrequest, work); 260 - 261 - netfs_write_to_cache_op(subreq); 262 - } 263 - 264 - /** 265 - * netfs_queue_write_request - Queue a write request for attention 266 - * @subreq: The write request to be queued 267 - * 268 - * Queue the specified write request for processing by a worker thread. We 269 - * pass the caller's ref on the request to the worker thread. 270 - */ 271 - void netfs_queue_write_request(struct netfs_io_subrequest *subreq) 272 - { 273 - if (!queue_work(system_unbound_wq, &subreq->work)) 274 - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); 275 - } 276 - EXPORT_SYMBOL(netfs_queue_write_request); 277 - 278 - /* 279 - * Set up a op for writing to the cache. 280 - */ 281 - static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) 282 - { 283 - struct netfs_cache_resources *cres = &wreq->cache_resources; 284 - struct netfs_io_subrequest *subreq; 285 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 286 - struct fscache_cookie *cookie = netfs_i_cookie(ctx); 287 - loff_t start = wreq->start; 288 - size_t len = wreq->len; 289 - int ret; 290 - 291 - if (!fscache_cookie_enabled(cookie)) { 292 - clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); 293 - return; 294 - } 295 - 296 - _debug("write to cache"); 297 - ret = fscache_begin_write_operation(cres, cookie); 298 - if (ret < 0) 299 - return; 300 - 301 - ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, 302 - i_size_read(wreq->inode), true); 303 - if (ret < 0) 304 - return; 305 - 306 - subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, 307 - netfs_write_to_cache_op_worker); 308 - if (!subreq) 309 - return; 310 - 311 - netfs_write_to_cache_op(subreq); 312 - } 313 - 314 - /* 315 - * Begin the process of writing out a chunk of data. 316 - * 317 - * We are given a write request that holds a series of dirty regions and 318 - * (partially) covers a sequence of folios, all of which are present. The 319 - * pages must have been marked as writeback as appropriate. 320 - * 321 - * We need to perform the following steps: 322 - * 323 - * (1) If encrypting, create an output buffer and encrypt each block of the 324 - * data into it, otherwise the output buffer will point to the original 325 - * folios. 326 - * 327 - * (2) If the data is to be cached, set up a write op for the entire output 328 - * buffer to the cache, if the cache wants to accept it. 329 - * 330 - * (3) If the data is to be uploaded (ie. not merely cached): 331 - * 332 - * (a) If the data is to be compressed, create a compression buffer and 333 - * compress the data into it. 334 - * 335 - * (b) For each destination we want to upload to, set up write ops to write 336 - * to that destination. We may need multiple writes if the data is not 337 - * contiguous or the span exceeds wsize for a server. 338 - */ 339 - int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, 340 - enum netfs_write_trace what) 341 - { 342 - struct netfs_inode *ctx = netfs_inode(wreq->inode); 343 - 344 - _enter("R=%x %llx-%llx f=%lx", 345 - wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, 346 - wreq->flags); 347 - 348 - trace_netfs_write(wreq, what); 349 - if (wreq->len == 0 || wreq->iter.count == 0) { 350 - pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); 351 - return -EIO; 352 - } 353 - 354 - if (wreq->origin == NETFS_DIO_WRITE) 355 - inode_dio_begin(wreq->inode); 356 - 357 - wreq->io_iter = wreq->iter; 358 - 359 - /* ->outstanding > 0 carries a ref */ 360 - netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 361 - atomic_set(&wreq->nr_outstanding, 1); 362 - 363 - /* Start the encryption/compression going. We can do that in the 364 - * background whilst we generate a list of write ops that we want to 365 - * perform. 366 - */ 367 - // TODO: Encrypt or compress the region as appropriate 368 - 369 - /* We need to write all of the region to the cache */ 370 - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 371 - netfs_set_up_write_to_cache(wreq); 372 - 373 - /* However, we don't necessarily write all of the region to the server. 374 - * Caching of reads is being managed this way also. 375 - */ 376 - if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 377 - ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); 378 - 379 - if (atomic_dec_and_test(&wreq->nr_outstanding)) 380 - netfs_write_terminated(wreq, false); 381 - 382 - if (!may_wait) 383 - return -EIOCBQUEUED; 384 - 385 - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 386 - TASK_UNINTERRUPTIBLE); 387 - return wreq->error; 388 - } 389 - 390 - /* 391 - * Begin a write operation for writing through the pagecache. 392 - */ 393 - struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 394 - { 395 - struct netfs_io_request *wreq; 396 - struct file *file = iocb->ki_filp; 397 - 398 - wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, 399 - NETFS_WRITETHROUGH); 400 - if (IS_ERR(wreq)) 401 - return wreq; 402 - 403 - trace_netfs_write(wreq, netfs_write_trace_writethrough); 404 - 405 - __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 406 - iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); 407 - wreq->io_iter = wreq->iter; 408 - 409 - /* ->outstanding > 0 carries a ref */ 410 - netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); 411 - atomic_set(&wreq->nr_outstanding, 1); 412 - return wreq; 413 - } 414 - 415 - static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) 416 - { 417 - struct netfs_inode *ictx = netfs_inode(wreq->inode); 418 - unsigned long long start; 419 - size_t len; 420 - 421 - if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 422 - return; 423 - 424 - start = wreq->start + wreq->submitted; 425 - len = wreq->iter.count - wreq->submitted; 426 - if (!final) { 427 - len /= wreq->wsize; /* Round to number of maximum packets */ 428 - len *= wreq->wsize; 429 - } 430 - 431 - ictx->ops->create_write_requests(wreq, start, len); 432 - wreq->submitted += len; 433 - } 434 - 435 - /* 436 - * Advance the state of the write operation used when writing through the 437 - * pagecache. Data has been copied into the pagecache that we need to append 438 - * to the request. If we've added more than wsize then we need to create a new 439 - * subrequest. 440 - */ 441 - int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) 442 - { 443 - _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", 444 - wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); 445 - 446 - wreq->iter.count += copied; 447 - wreq->io_iter.count += copied; 448 - if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) 449 - netfs_submit_writethrough(wreq, false); 450 - 451 - return wreq->error; 452 - } 453 - 454 - /* 455 - * End a write operation used when writing through the pagecache. 456 - */ 457 - int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) 458 - { 459 - int ret = -EIOCBQUEUED; 460 - 461 - _enter("ic=%zu sb=%zu ws=%u", 462 - wreq->iter.count, wreq->submitted, wreq->wsize); 463 - 464 - if (wreq->submitted < wreq->io_iter.count) 465 - netfs_submit_writethrough(wreq, true); 466 - 467 - if (atomic_dec_and_test(&wreq->nr_outstanding)) 468 - netfs_write_terminated(wreq, false); 469 - 470 - if (is_sync_kiocb(iocb)) { 471 - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, 472 - TASK_UNINTERRUPTIBLE); 473 - ret = wreq->error; 474 - } 475 - 476 - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 477 - return ret; 478 - }

+12 -5

fs/netfs/stats.c

··· 10 10 #include "internal.h" 11 11 12 12 atomic_t netfs_n_rh_dio_read; 13 - atomic_t netfs_n_rh_dio_write; 14 13 atomic_t netfs_n_rh_readahead; 15 - atomic_t netfs_n_rh_readpage; 14 + atomic_t netfs_n_rh_read_folio; 16 15 atomic_t netfs_n_rh_rreq; 17 16 atomic_t netfs_n_rh_sreq; 18 17 atomic_t netfs_n_rh_download; ··· 28 29 atomic_t netfs_n_rh_write_done; 29 30 atomic_t netfs_n_rh_write_failed; 30 31 atomic_t netfs_n_rh_write_zskip; 32 + atomic_t netfs_n_wh_buffered_write; 33 + atomic_t netfs_n_wh_writethrough; 34 + atomic_t netfs_n_wh_dio_write; 35 + atomic_t netfs_n_wh_writepages; 31 36 atomic_t netfs_n_wh_wstream_conflict; 32 37 atomic_t netfs_n_wh_upload; 33 38 atomic_t netfs_n_wh_upload_done; ··· 42 39 43 40 int netfs_stats_show(struct seq_file *m, void *v) 44 41 { 45 - seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n", 42 + seq_printf(m, "Netfs : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n", 46 43 atomic_read(&netfs_n_rh_dio_read), 47 - atomic_read(&netfs_n_rh_dio_write), 48 44 atomic_read(&netfs_n_rh_readahead), 49 - atomic_read(&netfs_n_rh_readpage), 45 + atomic_read(&netfs_n_rh_read_folio), 50 46 atomic_read(&netfs_n_rh_write_begin), 51 47 atomic_read(&netfs_n_rh_write_zskip)); 48 + seq_printf(m, "Netfs : BW=%u WT=%u DW=%u WP=%u\n", 49 + atomic_read(&netfs_n_wh_buffered_write), 50 + atomic_read(&netfs_n_wh_writethrough), 51 + atomic_read(&netfs_n_wh_dio_write), 52 + atomic_read(&netfs_n_wh_writepages)); 52 53 seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n", 53 54 atomic_read(&netfs_n_rh_zero), 54 55 atomic_read(&netfs_n_rh_short_read),

+808

fs/netfs/write_collect.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem write subrequest result collection, assessment 3 + * and retrying. 4 + * 5 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 + * Written by David Howells (dhowells@redhat.com) 7 + */ 8 + 9 + #include <linux/export.h> 10 + #include <linux/fs.h> 11 + #include <linux/mm.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/slab.h> 14 + #include "internal.h" 15 + 16 + /* Notes made in the collector */ 17 + #define HIT_PENDING 0x01 /* A front op was still pending */ 18 + #define SOME_EMPTY 0x02 /* One of more streams are empty */ 19 + #define ALL_EMPTY 0x04 /* All streams are empty */ 20 + #define MAYBE_DISCONTIG 0x08 /* A front op may be discontiguous (rounded to PAGE_SIZE) */ 21 + #define NEED_REASSESS 0x10 /* Need to loop round and reassess */ 22 + #define REASSESS_DISCONTIG 0x20 /* Reassess discontiguity if contiguity advances */ 23 + #define MADE_PROGRESS 0x40 /* Made progress cleaning up a stream or the folio set */ 24 + #define BUFFERED 0x80 /* The pagecache needs cleaning up */ 25 + #define NEED_RETRY 0x100 /* A front op requests retrying */ 26 + #define SAW_FAILURE 0x200 /* One stream or hit a permanent failure */ 27 + 28 + /* 29 + * Successful completion of write of a folio to the server and/or cache. Note 30 + * that we are not allowed to lock the folio here on pain of deadlocking with 31 + * truncate. 32 + */ 33 + int netfs_folio_written_back(struct folio *folio) 34 + { 35 + enum netfs_folio_trace why = netfs_folio_trace_clear; 36 + struct netfs_folio *finfo; 37 + struct netfs_group *group = NULL; 38 + int gcount = 0; 39 + 40 + if ((finfo = netfs_folio_info(folio))) { 41 + /* Streaming writes cannot be redirtied whilst under writeback, 42 + * so discard the streaming record. 43 + */ 44 + folio_detach_private(folio); 45 + group = finfo->netfs_group; 46 + gcount++; 47 + kfree(finfo); 48 + why = netfs_folio_trace_clear_s; 49 + goto end_wb; 50 + } 51 + 52 + if ((group = netfs_folio_group(folio))) { 53 + if (group == NETFS_FOLIO_COPY_TO_CACHE) { 54 + why = netfs_folio_trace_clear_cc; 55 + folio_detach_private(folio); 56 + goto end_wb; 57 + } 58 + 59 + /* Need to detach the group pointer if the page didn't get 60 + * redirtied. If it has been redirtied, then it must be within 61 + * the same group. 62 + */ 63 + why = netfs_folio_trace_redirtied; 64 + if (!folio_test_dirty(folio)) { 65 + folio_detach_private(folio); 66 + gcount++; 67 + why = netfs_folio_trace_clear_g; 68 + } 69 + } 70 + 71 + end_wb: 72 + trace_netfs_folio(folio, why); 73 + folio_end_writeback(folio); 74 + return gcount; 75 + } 76 + 77 + /* 78 + * Get hold of a folio we have under writeback. We don't want to get the 79 + * refcount on it. 80 + */ 81 + static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos) 82 + { 83 + XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE); 84 + struct folio *folio; 85 + 86 + rcu_read_lock(); 87 + 88 + for (;;) { 89 + xas_reset(&xas); 90 + folio = xas_load(&xas); 91 + if (xas_retry(&xas, folio)) 92 + continue; 93 + 94 + if (!folio || xa_is_value(folio)) 95 + kdebug("R=%08x: folio %lx (%llx) not present", 96 + wreq->debug_id, xas.xa_index, pos / PAGE_SIZE); 97 + BUG_ON(!folio || xa_is_value(folio)); 98 + 99 + if (folio == xas_reload(&xas)) 100 + break; 101 + } 102 + 103 + rcu_read_unlock(); 104 + 105 + if (WARN_ONCE(!folio_test_writeback(folio), 106 + "R=%08x: folio %lx is not under writeback\n", 107 + wreq->debug_id, folio->index)) { 108 + trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); 109 + } 110 + return folio; 111 + } 112 + 113 + /* 114 + * Unlock any folios we've finished with. 115 + */ 116 + static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, 117 + unsigned long long collected_to, 118 + unsigned int *notes) 119 + { 120 + for (;;) { 121 + struct folio *folio; 122 + struct netfs_folio *finfo; 123 + unsigned long long fpos, fend; 124 + size_t fsize, flen; 125 + 126 + folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to); 127 + 128 + fpos = folio_pos(folio); 129 + fsize = folio_size(folio); 130 + finfo = netfs_folio_info(folio); 131 + flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize; 132 + 133 + fend = min_t(unsigned long long, fpos + flen, wreq->i_size); 134 + 135 + trace_netfs_collect_folio(wreq, folio, fend, collected_to); 136 + 137 + if (fpos + fsize > wreq->contiguity) { 138 + trace_netfs_collect_contig(wreq, fpos + fsize, 139 + netfs_contig_trace_unlock); 140 + wreq->contiguity = fpos + fsize; 141 + } 142 + 143 + /* Unlock any folio we've transferred all of. */ 144 + if (collected_to < fend) 145 + break; 146 + 147 + wreq->nr_group_rel += netfs_folio_written_back(folio); 148 + wreq->cleaned_to = fpos + fsize; 149 + *notes |= MADE_PROGRESS; 150 + 151 + if (fpos + fsize >= collected_to) 152 + break; 153 + } 154 + } 155 + 156 + /* 157 + * Perform retries on the streams that need it. 158 + */ 159 + static void netfs_retry_write_stream(struct netfs_io_request *wreq, 160 + struct netfs_io_stream *stream) 161 + { 162 + struct list_head *next; 163 + 164 + _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); 165 + 166 + if (list_empty(&stream->subrequests)) 167 + return; 168 + 169 + if (stream->source == NETFS_UPLOAD_TO_SERVER && 170 + wreq->netfs_ops->retry_request) 171 + wreq->netfs_ops->retry_request(wreq, stream); 172 + 173 + if (unlikely(stream->failed)) 174 + return; 175 + 176 + /* If there's no renegotiation to do, just resend each failed subreq. */ 177 + if (!stream->prepare_write) { 178 + struct netfs_io_subrequest *subreq; 179 + 180 + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 181 + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 182 + break; 183 + if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 184 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 185 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 186 + netfs_reissue_write(stream, subreq); 187 + } 188 + } 189 + return; 190 + } 191 + 192 + next = stream->subrequests.next; 193 + 194 + do { 195 + struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp; 196 + unsigned long long start, len; 197 + size_t part; 198 + bool boundary = false; 199 + 200 + /* Go through the stream and find the next span of contiguous 201 + * data that we then rejig (cifs, for example, needs the wsize 202 + * renegotiating) and reissue. 203 + */ 204 + from = list_entry(next, struct netfs_io_subrequest, rreq_link); 205 + to = from; 206 + start = from->start + from->transferred; 207 + len = from->len - from->transferred; 208 + 209 + if (test_bit(NETFS_SREQ_FAILED, &from->flags) || 210 + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) 211 + return; 212 + 213 + list_for_each_continue(next, &stream->subrequests) { 214 + subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); 215 + if (subreq->start + subreq->transferred != start + len || 216 + test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || 217 + !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) 218 + break; 219 + to = subreq; 220 + len += to->len; 221 + } 222 + 223 + /* Work through the sublist. */ 224 + subreq = from; 225 + list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { 226 + if (!len) 227 + break; 228 + /* Renegotiate max_len (wsize) */ 229 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 230 + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 231 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 232 + stream->prepare_write(subreq); 233 + 234 + part = min(len, subreq->max_len); 235 + subreq->len = part; 236 + subreq->start = start; 237 + subreq->transferred = 0; 238 + len -= part; 239 + start += part; 240 + if (len && subreq == to && 241 + __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags)) 242 + boundary = true; 243 + 244 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 245 + netfs_reissue_write(stream, subreq); 246 + if (subreq == to) 247 + break; 248 + } 249 + 250 + /* If we managed to use fewer subreqs, we can discard the 251 + * excess; if we used the same number, then we're done. 252 + */ 253 + if (!len) { 254 + if (subreq == to) 255 + continue; 256 + list_for_each_entry_safe_from(subreq, tmp, 257 + &stream->subrequests, rreq_link) { 258 + trace_netfs_sreq(subreq, netfs_sreq_trace_discard); 259 + list_del(&subreq->rreq_link); 260 + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); 261 + if (subreq == to) 262 + break; 263 + } 264 + continue; 265 + } 266 + 267 + /* We ran out of subrequests, so we need to allocate some more 268 + * and insert them after. 269 + */ 270 + do { 271 + subreq = netfs_alloc_subrequest(wreq); 272 + subreq->source = to->source; 273 + subreq->start = start; 274 + subreq->max_len = len; 275 + subreq->max_nr_segs = INT_MAX; 276 + subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); 277 + subreq->stream_nr = to->stream_nr; 278 + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 279 + 280 + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 281 + refcount_read(&subreq->ref), 282 + netfs_sreq_trace_new); 283 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 284 + 285 + list_add(&subreq->rreq_link, &to->rreq_link); 286 + to = list_next_entry(to, rreq_link); 287 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 288 + 289 + switch (stream->source) { 290 + case NETFS_UPLOAD_TO_SERVER: 291 + netfs_stat(&netfs_n_wh_upload); 292 + subreq->max_len = min(len, wreq->wsize); 293 + break; 294 + case NETFS_WRITE_TO_CACHE: 295 + netfs_stat(&netfs_n_wh_write); 296 + break; 297 + default: 298 + WARN_ON_ONCE(1); 299 + } 300 + 301 + stream->prepare_write(subreq); 302 + 303 + part = min(len, subreq->max_len); 304 + subreq->len = subreq->transferred + part; 305 + len -= part; 306 + start += part; 307 + if (!len && boundary) { 308 + __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); 309 + boundary = false; 310 + } 311 + 312 + netfs_reissue_write(stream, subreq); 313 + if (!len) 314 + break; 315 + 316 + } while (len); 317 + 318 + } while (!list_is_head(next, &stream->subrequests)); 319 + } 320 + 321 + /* 322 + * Perform retries on the streams that need it. If we're doing content 323 + * encryption and the server copy changed due to a third-party write, we may 324 + * need to do an RMW cycle and also rewrite the data to the cache. 325 + */ 326 + static void netfs_retry_writes(struct netfs_io_request *wreq) 327 + { 328 + struct netfs_io_subrequest *subreq; 329 + struct netfs_io_stream *stream; 330 + int s; 331 + 332 + /* Wait for all outstanding I/O to quiesce before performing retries as 333 + * we may need to renegotiate the I/O sizes. 334 + */ 335 + for (s = 0; s < NR_IO_STREAMS; s++) { 336 + stream = &wreq->io_streams[s]; 337 + if (!stream->active) 338 + continue; 339 + 340 + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 341 + wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, 342 + TASK_UNINTERRUPTIBLE); 343 + } 344 + } 345 + 346 + // TODO: Enc: Fetch changed partial pages 347 + // TODO: Enc: Reencrypt content if needed. 348 + // TODO: Enc: Wind back transferred point. 349 + // TODO: Enc: Mark cache pages for retry. 350 + 351 + for (s = 0; s < NR_IO_STREAMS; s++) { 352 + stream = &wreq->io_streams[s]; 353 + if (stream->need_retry) { 354 + stream->need_retry = false; 355 + netfs_retry_write_stream(wreq, stream); 356 + } 357 + } 358 + } 359 + 360 + /* 361 + * Collect and assess the results of various write subrequests. We may need to 362 + * retry some of the results - or even do an RMW cycle for content crypto. 363 + * 364 + * Note that we have a number of parallel, overlapping lists of subrequests, 365 + * one to the server and one to the local cache for example, which may not be 366 + * the same size or starting position and may not even correspond in boundary 367 + * alignment. 368 + */ 369 + static void netfs_collect_write_results(struct netfs_io_request *wreq) 370 + { 371 + struct netfs_io_subrequest *front, *remove; 372 + struct netfs_io_stream *stream; 373 + unsigned long long collected_to; 374 + unsigned int notes; 375 + int s; 376 + 377 + _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 378 + trace_netfs_collect(wreq); 379 + trace_netfs_rreq(wreq, netfs_rreq_trace_collect); 380 + 381 + reassess_streams: 382 + smp_rmb(); 383 + collected_to = ULLONG_MAX; 384 + if (wreq->origin == NETFS_WRITEBACK) 385 + notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG; 386 + else if (wreq->origin == NETFS_WRITETHROUGH) 387 + notes = ALL_EMPTY | BUFFERED; 388 + else 389 + notes = ALL_EMPTY; 390 + 391 + /* Remove completed subrequests from the front of the streams and 392 + * advance the completion point on each stream. We stop when we hit 393 + * something that's in progress. The issuer thread may be adding stuff 394 + * to the tail whilst we're doing this. 395 + * 396 + * We must not, however, merge in discontiguities that span whole 397 + * folios that aren't under writeback. This is made more complicated 398 + * by the folios in the gap being of unpredictable sizes - if they even 399 + * exist - but we don't want to look them up. 400 + */ 401 + for (s = 0; s < NR_IO_STREAMS; s++) { 402 + loff_t rstart, rend; 403 + 404 + stream = &wreq->io_streams[s]; 405 + /* Read active flag before list pointers */ 406 + if (!smp_load_acquire(&stream->active)) 407 + continue; 408 + 409 + front = stream->front; 410 + while (front) { 411 + trace_netfs_collect_sreq(wreq, front); 412 + //_debug("sreq [%x] %llx %zx/%zx", 413 + // front->debug_index, front->start, front->transferred, front->len); 414 + 415 + /* Stall if there may be a discontinuity. */ 416 + rstart = round_down(front->start, PAGE_SIZE); 417 + if (rstart > wreq->contiguity) { 418 + if (wreq->contiguity > stream->collected_to) { 419 + trace_netfs_collect_gap(wreq, stream, 420 + wreq->contiguity, 'D'); 421 + stream->collected_to = wreq->contiguity; 422 + } 423 + notes |= REASSESS_DISCONTIG; 424 + break; 425 + } 426 + rend = round_up(front->start + front->len, PAGE_SIZE); 427 + if (rend > wreq->contiguity) { 428 + trace_netfs_collect_contig(wreq, rend, 429 + netfs_contig_trace_collect); 430 + wreq->contiguity = rend; 431 + if (notes & REASSESS_DISCONTIG) 432 + notes |= NEED_REASSESS; 433 + } 434 + notes &= ~MAYBE_DISCONTIG; 435 + 436 + /* Stall if the front is still undergoing I/O. */ 437 + if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { 438 + notes |= HIT_PENDING; 439 + break; 440 + } 441 + smp_rmb(); /* Read counters after I-P flag. */ 442 + 443 + if (stream->failed) { 444 + stream->collected_to = front->start + front->len; 445 + notes |= MADE_PROGRESS | SAW_FAILURE; 446 + goto cancel; 447 + } 448 + if (front->start + front->transferred > stream->collected_to) { 449 + stream->collected_to = front->start + front->transferred; 450 + stream->transferred = stream->collected_to - wreq->start; 451 + notes |= MADE_PROGRESS; 452 + } 453 + if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 454 + stream->failed = true; 455 + stream->error = front->error; 456 + if (stream->source == NETFS_UPLOAD_TO_SERVER) 457 + mapping_set_error(wreq->mapping, front->error); 458 + notes |= NEED_REASSESS | SAW_FAILURE; 459 + break; 460 + } 461 + if (front->transferred < front->len) { 462 + stream->need_retry = true; 463 + notes |= NEED_RETRY | MADE_PROGRESS; 464 + break; 465 + } 466 + 467 + cancel: 468 + /* Remove if completely consumed. */ 469 + spin_lock(&wreq->lock); 470 + 471 + remove = front; 472 + list_del_init(&front->rreq_link); 473 + front = list_first_entry_or_null(&stream->subrequests, 474 + struct netfs_io_subrequest, rreq_link); 475 + stream->front = front; 476 + if (!front) { 477 + unsigned long long jump_to = atomic64_read(&wreq->issued_to); 478 + 479 + if (stream->collected_to < jump_to) { 480 + trace_netfs_collect_gap(wreq, stream, jump_to, 'A'); 481 + stream->collected_to = jump_to; 482 + } 483 + } 484 + 485 + spin_unlock(&wreq->lock); 486 + netfs_put_subrequest(remove, false, 487 + notes & SAW_FAILURE ? 488 + netfs_sreq_trace_put_cancel : 489 + netfs_sreq_trace_put_done); 490 + } 491 + 492 + if (front) 493 + notes &= ~ALL_EMPTY; 494 + else 495 + notes |= SOME_EMPTY; 496 + 497 + if (stream->collected_to < collected_to) 498 + collected_to = stream->collected_to; 499 + } 500 + 501 + if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to) 502 + wreq->collected_to = collected_to; 503 + 504 + /* If we have an empty stream, we need to jump it forward over any gap 505 + * otherwise the collection point will never advance. 506 + * 507 + * Note that the issuer always adds to the stream with the lowest 508 + * so-far submitted start, so if we see two consecutive subreqs in one 509 + * stream with nothing between then in another stream, then the second 510 + * stream has a gap that can be jumped. 511 + */ 512 + if (notes & SOME_EMPTY) { 513 + unsigned long long jump_to = wreq->start + wreq->len; 514 + 515 + for (s = 0; s < NR_IO_STREAMS; s++) { 516 + stream = &wreq->io_streams[s]; 517 + if (stream->active && 518 + stream->front && 519 + stream->front->start < jump_to) 520 + jump_to = stream->front->start; 521 + } 522 + 523 + for (s = 0; s < NR_IO_STREAMS; s++) { 524 + stream = &wreq->io_streams[s]; 525 + if (stream->active && 526 + !stream->front && 527 + stream->collected_to < jump_to) { 528 + trace_netfs_collect_gap(wreq, stream, jump_to, 'B'); 529 + stream->collected_to = jump_to; 530 + } 531 + } 532 + } 533 + 534 + for (s = 0; s < NR_IO_STREAMS; s++) { 535 + stream = &wreq->io_streams[s]; 536 + if (stream->active) 537 + trace_netfs_collect_stream(wreq, stream); 538 + } 539 + 540 + trace_netfs_collect_state(wreq, wreq->collected_to, notes); 541 + 542 + /* Unlock any folios that we have now finished with. */ 543 + if (notes & BUFFERED) { 544 + unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity); 545 + 546 + if (wreq->cleaned_to < clean_to) 547 + netfs_writeback_unlock_folios(wreq, clean_to, &notes); 548 + } else { 549 + wreq->cleaned_to = wreq->collected_to; 550 + } 551 + 552 + // TODO: Discard encryption buffers 553 + 554 + /* If all streams are discontiguous with the last folio we cleared, we 555 + * may need to skip a set of folios. 556 + */ 557 + if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) { 558 + unsigned long long jump_to = ULLONG_MAX; 559 + 560 + for (s = 0; s < NR_IO_STREAMS; s++) { 561 + stream = &wreq->io_streams[s]; 562 + if (stream->active && stream->front && 563 + stream->front->start < jump_to) 564 + jump_to = stream->front->start; 565 + } 566 + 567 + trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump); 568 + wreq->contiguity = jump_to; 569 + wreq->cleaned_to = jump_to; 570 + wreq->collected_to = jump_to; 571 + for (s = 0; s < NR_IO_STREAMS; s++) { 572 + stream = &wreq->io_streams[s]; 573 + if (stream->collected_to < jump_to) 574 + stream->collected_to = jump_to; 575 + } 576 + //cond_resched(); 577 + notes |= MADE_PROGRESS; 578 + goto reassess_streams; 579 + } 580 + 581 + if (notes & NEED_RETRY) 582 + goto need_retry; 583 + if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 584 + trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); 585 + clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); 586 + wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE); 587 + } 588 + 589 + if (notes & NEED_REASSESS) { 590 + //cond_resched(); 591 + goto reassess_streams; 592 + } 593 + if (notes & MADE_PROGRESS) { 594 + //cond_resched(); 595 + goto reassess_streams; 596 + } 597 + 598 + out: 599 + netfs_put_group_many(wreq->group, wreq->nr_group_rel); 600 + wreq->nr_group_rel = 0; 601 + _leave(" = %x", notes); 602 + return; 603 + 604 + need_retry: 605 + /* Okay... We're going to have to retry one or both streams. Note 606 + * that any partially completed op will have had any wholly transferred 607 + * folios removed from it. 608 + */ 609 + _debug("retry"); 610 + netfs_retry_writes(wreq); 611 + goto out; 612 + } 613 + 614 + /* 615 + * Perform the collection of subrequests, folios and encryption buffers. 616 + */ 617 + void netfs_write_collection_worker(struct work_struct *work) 618 + { 619 + struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); 620 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 621 + size_t transferred; 622 + int s; 623 + 624 + _enter("R=%x", wreq->debug_id); 625 + 626 + netfs_see_request(wreq, netfs_rreq_trace_see_work); 627 + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { 628 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 629 + return; 630 + } 631 + 632 + netfs_collect_write_results(wreq); 633 + 634 + /* We're done when the app thread has finished posting subreqs and all 635 + * the queues in all the streams are empty. 636 + */ 637 + if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { 638 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 639 + return; 640 + } 641 + smp_rmb(); /* Read ALL_QUEUED before lists. */ 642 + 643 + transferred = LONG_MAX; 644 + for (s = 0; s < NR_IO_STREAMS; s++) { 645 + struct netfs_io_stream *stream = &wreq->io_streams[s]; 646 + if (!stream->active) 647 + continue; 648 + if (!list_empty(&stream->subrequests)) { 649 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 650 + return; 651 + } 652 + if (stream->transferred < transferred) 653 + transferred = stream->transferred; 654 + } 655 + 656 + /* Okay, declare that all I/O is complete. */ 657 + wreq->transferred = transferred; 658 + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 659 + 660 + if (wreq->io_streams[1].active && 661 + wreq->io_streams[1].failed) { 662 + /* Cache write failure doesn't prevent writeback completion 663 + * unless we're in disconnected mode. 664 + */ 665 + ictx->ops->invalidate_cache(wreq); 666 + } 667 + 668 + if (wreq->cleanup) 669 + wreq->cleanup(wreq); 670 + 671 + if (wreq->origin == NETFS_DIO_WRITE && 672 + wreq->mapping->nrpages) { 673 + /* mmap may have got underfoot and we may now have folios 674 + * locally covering the region we just wrote. Attempt to 675 + * discard the folios, but leave in place any modified locally. 676 + * ->write_iter() is prevented from interfering by the DIO 677 + * counter. 678 + */ 679 + pgoff_t first = wreq->start >> PAGE_SHIFT; 680 + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 681 + invalidate_inode_pages2_range(wreq->mapping, first, last); 682 + } 683 + 684 + if (wreq->origin == NETFS_DIO_WRITE) 685 + inode_dio_end(wreq->inode); 686 + 687 + _debug("finished"); 688 + trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 689 + clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 690 + wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 691 + 692 + if (wreq->iocb) { 693 + wreq->iocb->ki_pos += wreq->transferred; 694 + if (wreq->iocb->ki_complete) 695 + wreq->iocb->ki_complete( 696 + wreq->iocb, wreq->error ? wreq->error : wreq->transferred); 697 + wreq->iocb = VFS_PTR_POISON; 698 + } 699 + 700 + netfs_clear_subrequests(wreq, false); 701 + netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); 702 + } 703 + 704 + /* 705 + * Wake the collection work item. 706 + */ 707 + void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) 708 + { 709 + if (!work_pending(&wreq->work)) { 710 + netfs_get_request(wreq, netfs_rreq_trace_get_work); 711 + if (!queue_work(system_unbound_wq, &wreq->work)) 712 + netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); 713 + } 714 + } 715 + 716 + /** 717 + * netfs_write_subrequest_terminated - Note the termination of a write operation. 718 + * @_op: The I/O request that has terminated. 719 + * @transferred_or_error: The amount of data transferred or an error code. 720 + * @was_async: The termination was asynchronous 721 + * 722 + * This tells the library that a contributory write I/O operation has 723 + * terminated, one way or another, and that it should collect the results. 724 + * 725 + * The caller indicates in @transferred_or_error the outcome of the operation, 726 + * supplying a positive value to indicate the number of bytes transferred or a 727 + * negative error code. The library will look after reissuing I/O operations 728 + * as appropriate and writing downloaded data to the cache. 729 + * 730 + * If @was_async is true, the caller might be running in softirq or interrupt 731 + * context and we can't sleep. 732 + * 733 + * When this is called, ownership of the subrequest is transferred back to the 734 + * library, along with a ref. 735 + * 736 + * Note that %_op is a void* so that the function can be passed to 737 + * kiocb::term_func without the need for a casting wrapper. 738 + */ 739 + void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 740 + bool was_async) 741 + { 742 + struct netfs_io_subrequest *subreq = _op; 743 + struct netfs_io_request *wreq = subreq->rreq; 744 + struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; 745 + 746 + _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 747 + 748 + switch (subreq->source) { 749 + case NETFS_UPLOAD_TO_SERVER: 750 + netfs_stat(&netfs_n_wh_upload_done); 751 + break; 752 + case NETFS_WRITE_TO_CACHE: 753 + netfs_stat(&netfs_n_wh_write_done); 754 + break; 755 + case NETFS_INVALID_WRITE: 756 + break; 757 + default: 758 + BUG(); 759 + } 760 + 761 + if (IS_ERR_VALUE(transferred_or_error)) { 762 + subreq->error = transferred_or_error; 763 + if (subreq->error == -EAGAIN) 764 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 765 + else 766 + set_bit(NETFS_SREQ_FAILED, &subreq->flags); 767 + trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write); 768 + 769 + switch (subreq->source) { 770 + case NETFS_WRITE_TO_CACHE: 771 + netfs_stat(&netfs_n_wh_write_failed); 772 + break; 773 + case NETFS_UPLOAD_TO_SERVER: 774 + netfs_stat(&netfs_n_wh_upload_failed); 775 + break; 776 + default: 777 + break; 778 + } 779 + trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause); 780 + set_bit(NETFS_RREQ_PAUSE, &wreq->flags); 781 + } else { 782 + if (WARN(transferred_or_error > subreq->len - subreq->transferred, 783 + "Subreq excess write: R=%x[%x] %zd > %zu - %zu", 784 + wreq->debug_id, subreq->debug_index, 785 + transferred_or_error, subreq->len, subreq->transferred)) 786 + transferred_or_error = subreq->len - subreq->transferred; 787 + 788 + subreq->error = 0; 789 + subreq->transferred += transferred_or_error; 790 + 791 + if (subreq->transferred < subreq->len) 792 + set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 793 + } 794 + 795 + trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 796 + 797 + clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 798 + wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS); 799 + 800 + /* If we are at the head of the queue, wake up the collector, 801 + * transferring a ref to it if we were the ones to do so. 802 + */ 803 + if (list_is_first(&subreq->rreq_link, &stream->subrequests)) 804 + netfs_wake_write_collector(wreq, was_async); 805 + 806 + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 807 + } 808 + EXPORT_SYMBOL(netfs_write_subrequest_terminated);

+684

fs/netfs/write_issue.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Network filesystem high-level (buffered) writeback. 3 + * 4 + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 + * Written by David Howells (dhowells@redhat.com) 6 + * 7 + * 8 + * To support network filesystems with local caching, we manage a situation 9 + * that can be envisioned like the following: 10 + * 11 + * +---+---+-----+-----+---+----------+ 12 + * Folios: | | | | | | | 13 + * +---+---+-----+-----+---+----------+ 14 + * 15 + * +------+------+ +----+----+ 16 + * Upload: | | |.....| | | 17 + * (Stream 0) +------+------+ +----+----+ 18 + * 19 + * +------+------+------+------+------+ 20 + * Cache: | | | | | | 21 + * (Stream 1) +------+------+------+------+------+ 22 + * 23 + * Where we have a sequence of folios of varying sizes that we need to overlay 24 + * with multiple parallel streams of I/O requests, where the I/O requests in a 25 + * stream may also be of various sizes (in cifs, for example, the sizes are 26 + * negotiated with the server; in something like ceph, they may represent the 27 + * sizes of storage objects). 28 + * 29 + * The sequence in each stream may contain gaps and noncontiguous subrequests 30 + * may be glued together into single vectored write RPCs. 31 + */ 32 + 33 + #include <linux/export.h> 34 + #include <linux/fs.h> 35 + #include <linux/mm.h> 36 + #include <linux/pagemap.h> 37 + #include "internal.h" 38 + 39 + /* 40 + * Kill all dirty folios in the event of an unrecoverable error, starting with 41 + * a locked folio we've already obtained from writeback_iter(). 42 + */ 43 + static void netfs_kill_dirty_pages(struct address_space *mapping, 44 + struct writeback_control *wbc, 45 + struct folio *folio) 46 + { 47 + int error = 0; 48 + 49 + do { 50 + enum netfs_folio_trace why = netfs_folio_trace_kill; 51 + struct netfs_group *group = NULL; 52 + struct netfs_folio *finfo = NULL; 53 + void *priv; 54 + 55 + priv = folio_detach_private(folio); 56 + if (priv) { 57 + finfo = __netfs_folio_info(priv); 58 + if (finfo) { 59 + /* Kill folio from streaming write. */ 60 + group = finfo->netfs_group; 61 + why = netfs_folio_trace_kill_s; 62 + } else { 63 + group = priv; 64 + if (group == NETFS_FOLIO_COPY_TO_CACHE) { 65 + /* Kill copy-to-cache folio */ 66 + why = netfs_folio_trace_kill_cc; 67 + group = NULL; 68 + } else { 69 + /* Kill folio with group */ 70 + why = netfs_folio_trace_kill_g; 71 + } 72 + } 73 + } 74 + 75 + trace_netfs_folio(folio, why); 76 + 77 + folio_start_writeback(folio); 78 + folio_unlock(folio); 79 + folio_end_writeback(folio); 80 + 81 + netfs_put_group(group); 82 + kfree(finfo); 83 + 84 + } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 85 + } 86 + 87 + /* 88 + * Create a write request and set it up appropriately for the origin type. 89 + */ 90 + struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 91 + struct file *file, 92 + loff_t start, 93 + enum netfs_io_origin origin) 94 + { 95 + struct netfs_io_request *wreq; 96 + struct netfs_inode *ictx; 97 + 98 + wreq = netfs_alloc_request(mapping, file, start, 0, origin); 99 + if (IS_ERR(wreq)) 100 + return wreq; 101 + 102 + _enter("R=%x", wreq->debug_id); 103 + 104 + ictx = netfs_inode(wreq->inode); 105 + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) 106 + fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); 107 + 108 + wreq->contiguity = wreq->start; 109 + wreq->cleaned_to = wreq->start; 110 + INIT_WORK(&wreq->work, netfs_write_collection_worker); 111 + 112 + wreq->io_streams[0].stream_nr = 0; 113 + wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER; 114 + wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; 115 + wreq->io_streams[0].issue_write = ictx->ops->issue_write; 116 + wreq->io_streams[0].collected_to = start; 117 + wreq->io_streams[0].transferred = LONG_MAX; 118 + 119 + wreq->io_streams[1].stream_nr = 1; 120 + wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; 121 + wreq->io_streams[1].collected_to = start; 122 + wreq->io_streams[1].transferred = LONG_MAX; 123 + if (fscache_resources_valid(&wreq->cache_resources)) { 124 + wreq->io_streams[1].avail = true; 125 + wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq; 126 + wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write; 127 + } 128 + 129 + return wreq; 130 + } 131 + 132 + /** 133 + * netfs_prepare_write_failed - Note write preparation failed 134 + * @subreq: The subrequest to mark 135 + * 136 + * Mark a subrequest to note that preparation for write failed. 137 + */ 138 + void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq) 139 + { 140 + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 141 + trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed); 142 + } 143 + EXPORT_SYMBOL(netfs_prepare_write_failed); 144 + 145 + /* 146 + * Prepare a write subrequest. We need to allocate a new subrequest 147 + * if we don't have one. 148 + */ 149 + static void netfs_prepare_write(struct netfs_io_request *wreq, 150 + struct netfs_io_stream *stream, 151 + loff_t start) 152 + { 153 + struct netfs_io_subrequest *subreq; 154 + 155 + subreq = netfs_alloc_subrequest(wreq); 156 + subreq->source = stream->source; 157 + subreq->start = start; 158 + subreq->max_len = ULONG_MAX; 159 + subreq->max_nr_segs = INT_MAX; 160 + subreq->stream_nr = stream->stream_nr; 161 + 162 + _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 163 + 164 + trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 165 + refcount_read(&subreq->ref), 166 + netfs_sreq_trace_new); 167 + 168 + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 169 + 170 + switch (stream->source) { 171 + case NETFS_UPLOAD_TO_SERVER: 172 + netfs_stat(&netfs_n_wh_upload); 173 + subreq->max_len = wreq->wsize; 174 + break; 175 + case NETFS_WRITE_TO_CACHE: 176 + netfs_stat(&netfs_n_wh_write); 177 + break; 178 + default: 179 + WARN_ON_ONCE(1); 180 + break; 181 + } 182 + 183 + if (stream->prepare_write) 184 + stream->prepare_write(subreq); 185 + 186 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 187 + 188 + /* We add to the end of the list whilst the collector may be walking 189 + * the list. The collector only goes nextwards and uses the lock to 190 + * remove entries off of the front. 191 + */ 192 + spin_lock(&wreq->lock); 193 + list_add_tail(&subreq->rreq_link, &stream->subrequests); 194 + if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 195 + stream->front = subreq; 196 + if (!stream->active) { 197 + stream->collected_to = stream->front->start; 198 + /* Write list pointers before active flag */ 199 + smp_store_release(&stream->active, true); 200 + } 201 + } 202 + 203 + spin_unlock(&wreq->lock); 204 + 205 + stream->construct = subreq; 206 + } 207 + 208 + /* 209 + * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O 210 + * operation. The operation may be asynchronous and should call 211 + * netfs_write_subrequest_terminated() when complete. 212 + */ 213 + static void netfs_do_issue_write(struct netfs_io_stream *stream, 214 + struct netfs_io_subrequest *subreq) 215 + { 216 + struct netfs_io_request *wreq = subreq->rreq; 217 + 218 + _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 219 + 220 + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 221 + return netfs_write_subrequest_terminated(subreq, subreq->error, false); 222 + 223 + // TODO: Use encrypted buffer 224 + if (test_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags)) { 225 + subreq->io_iter = wreq->io_iter; 226 + iov_iter_advance(&subreq->io_iter, 227 + subreq->start + subreq->transferred - wreq->start); 228 + iov_iter_truncate(&subreq->io_iter, 229 + subreq->len - subreq->transferred); 230 + } else { 231 + iov_iter_xarray(&subreq->io_iter, ITER_SOURCE, &wreq->mapping->i_pages, 232 + subreq->start + subreq->transferred, 233 + subreq->len - subreq->transferred); 234 + } 235 + 236 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 237 + stream->issue_write(subreq); 238 + } 239 + 240 + void netfs_reissue_write(struct netfs_io_stream *stream, 241 + struct netfs_io_subrequest *subreq) 242 + { 243 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 244 + netfs_do_issue_write(stream, subreq); 245 + } 246 + 247 + static void netfs_issue_write(struct netfs_io_request *wreq, 248 + struct netfs_io_stream *stream) 249 + { 250 + struct netfs_io_subrequest *subreq = stream->construct; 251 + 252 + if (!subreq) 253 + return; 254 + stream->construct = NULL; 255 + 256 + if (subreq->start + subreq->len > wreq->start + wreq->submitted) 257 + wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start; 258 + netfs_do_issue_write(stream, subreq); 259 + } 260 + 261 + /* 262 + * Add data to the write subrequest, dispatching each as we fill it up or if it 263 + * is discontiguous with the previous. We only fill one part at a time so that 264 + * we can avoid overrunning the credits obtained (cifs) and try to parallelise 265 + * content-crypto preparation with network writes. 266 + */ 267 + int netfs_advance_write(struct netfs_io_request *wreq, 268 + struct netfs_io_stream *stream, 269 + loff_t start, size_t len, bool to_eof) 270 + { 271 + struct netfs_io_subrequest *subreq = stream->construct; 272 + size_t part; 273 + 274 + if (!stream->avail) { 275 + _leave("no write"); 276 + return len; 277 + } 278 + 279 + _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 280 + 281 + if (subreq && start != subreq->start + subreq->len) { 282 + netfs_issue_write(wreq, stream); 283 + subreq = NULL; 284 + } 285 + 286 + if (!stream->construct) 287 + netfs_prepare_write(wreq, stream, start); 288 + subreq = stream->construct; 289 + 290 + part = min(subreq->max_len - subreq->len, len); 291 + _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); 292 + subreq->len += part; 293 + subreq->nr_segs++; 294 + 295 + if (subreq->len >= subreq->max_len || 296 + subreq->nr_segs >= subreq->max_nr_segs || 297 + to_eof) { 298 + netfs_issue_write(wreq, stream); 299 + subreq = NULL; 300 + } 301 + 302 + return part; 303 + } 304 + 305 + /* 306 + * Write some of a pending folio data back to the server. 307 + */ 308 + static int netfs_write_folio(struct netfs_io_request *wreq, 309 + struct writeback_control *wbc, 310 + struct folio *folio) 311 + { 312 + struct netfs_io_stream *upload = &wreq->io_streams[0]; 313 + struct netfs_io_stream *cache = &wreq->io_streams[1]; 314 + struct netfs_io_stream *stream; 315 + struct netfs_group *fgroup; /* TODO: Use this with ceph */ 316 + struct netfs_folio *finfo; 317 + size_t fsize = folio_size(folio), flen = fsize, foff = 0; 318 + loff_t fpos = folio_pos(folio), i_size; 319 + bool to_eof = false, streamw = false; 320 + bool debug = false; 321 + 322 + _enter(""); 323 + 324 + /* netfs_perform_write() may shift i_size around the page or from out 325 + * of the page to beyond it, but cannot move i_size into or through the 326 + * page since we have it locked. 327 + */ 328 + i_size = i_size_read(wreq->inode); 329 + 330 + if (fpos >= i_size) { 331 + /* mmap beyond eof. */ 332 + _debug("beyond eof"); 333 + folio_start_writeback(folio); 334 + folio_unlock(folio); 335 + wreq->nr_group_rel += netfs_folio_written_back(folio); 336 + netfs_put_group_many(wreq->group, wreq->nr_group_rel); 337 + wreq->nr_group_rel = 0; 338 + return 0; 339 + } 340 + 341 + if (fpos + fsize > wreq->i_size) 342 + wreq->i_size = i_size; 343 + 344 + fgroup = netfs_folio_group(folio); 345 + finfo = netfs_folio_info(folio); 346 + if (finfo) { 347 + foff = finfo->dirty_offset; 348 + flen = foff + finfo->dirty_len; 349 + streamw = true; 350 + } 351 + 352 + if (wreq->origin == NETFS_WRITETHROUGH) { 353 + to_eof = false; 354 + if (flen > i_size - fpos) 355 + flen = i_size - fpos; 356 + } else if (flen > i_size - fpos) { 357 + flen = i_size - fpos; 358 + if (!streamw) 359 + folio_zero_segment(folio, flen, fsize); 360 + to_eof = true; 361 + } else if (flen == i_size - fpos) { 362 + to_eof = true; 363 + } 364 + flen -= foff; 365 + 366 + _debug("folio %zx %zx %zx", foff, flen, fsize); 367 + 368 + /* Deal with discontinuities in the stream of dirty pages. These can 369 + * arise from a number of sources: 370 + * 371 + * (1) Intervening non-dirty pages from random-access writes, multiple 372 + * flushers writing back different parts simultaneously and manual 373 + * syncing. 374 + * 375 + * (2) Partially-written pages from write-streaming. 376 + * 377 + * (3) Pages that belong to a different write-back group (eg. Ceph 378 + * snapshots). 379 + * 380 + * (4) Actually-clean pages that were marked for write to the cache 381 + * when they were read. Note that these appear as a special 382 + * write-back group. 383 + */ 384 + if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 385 + netfs_issue_write(wreq, upload); 386 + } else if (fgroup != wreq->group) { 387 + /* We can't write this page to the server yet. */ 388 + kdebug("wrong group"); 389 + folio_redirty_for_writepage(wbc, folio); 390 + folio_unlock(folio); 391 + netfs_issue_write(wreq, upload); 392 + netfs_issue_write(wreq, cache); 393 + return 0; 394 + } 395 + 396 + if (foff > 0) 397 + netfs_issue_write(wreq, upload); 398 + if (streamw) 399 + netfs_issue_write(wreq, cache); 400 + 401 + /* Flip the page to the writeback state and unlock. If we're called 402 + * from write-through, then the page has already been put into the wb 403 + * state. 404 + */ 405 + if (wreq->origin == NETFS_WRITEBACK) 406 + folio_start_writeback(folio); 407 + folio_unlock(folio); 408 + 409 + if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 410 + if (!fscache_resources_valid(&wreq->cache_resources)) { 411 + trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); 412 + netfs_issue_write(wreq, upload); 413 + netfs_folio_written_back(folio); 414 + return 0; 415 + } 416 + trace_netfs_folio(folio, netfs_folio_trace_store_copy); 417 + } else if (!upload->construct) { 418 + trace_netfs_folio(folio, netfs_folio_trace_store); 419 + } else { 420 + trace_netfs_folio(folio, netfs_folio_trace_store_plus); 421 + } 422 + 423 + /* Move the submission point forward to allow for write-streaming data 424 + * not starting at the front of the page. We don't do write-streaming 425 + * with the cache as the cache requires DIO alignment. 426 + * 427 + * Also skip uploading for data that's been read and just needs copying 428 + * to the cache. 429 + */ 430 + for (int s = 0; s < NR_IO_STREAMS; s++) { 431 + stream = &wreq->io_streams[s]; 432 + stream->submit_max_len = fsize; 433 + stream->submit_off = foff; 434 + stream->submit_len = flen; 435 + if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) || 436 + (stream->source == NETFS_UPLOAD_TO_SERVER && 437 + fgroup == NETFS_FOLIO_COPY_TO_CACHE)) { 438 + stream->submit_off = UINT_MAX; 439 + stream->submit_len = 0; 440 + stream->submit_max_len = 0; 441 + } 442 + } 443 + 444 + /* Attach the folio to one or more subrequests. For a big folio, we 445 + * could end up with thousands of subrequests if the wsize is small - 446 + * but we might need to wait during the creation of subrequests for 447 + * network resources (eg. SMB credits). 448 + */ 449 + for (;;) { 450 + ssize_t part; 451 + size_t lowest_off = ULONG_MAX; 452 + int choose_s = -1; 453 + 454 + /* Always add to the lowest-submitted stream first. */ 455 + for (int s = 0; s < NR_IO_STREAMS; s++) { 456 + stream = &wreq->io_streams[s]; 457 + if (stream->submit_len > 0 && 458 + stream->submit_off < lowest_off) { 459 + lowest_off = stream->submit_off; 460 + choose_s = s; 461 + } 462 + } 463 + 464 + if (choose_s < 0) 465 + break; 466 + stream = &wreq->io_streams[choose_s]; 467 + 468 + part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 469 + stream->submit_len, to_eof); 470 + atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 471 + stream->submit_off += part; 472 + stream->submit_max_len -= part; 473 + if (part > stream->submit_len) 474 + stream->submit_len = 0; 475 + else 476 + stream->submit_len -= part; 477 + if (part > 0) 478 + debug = true; 479 + } 480 + 481 + atomic64_set(&wreq->issued_to, fpos + fsize); 482 + 483 + if (!debug) 484 + kdebug("R=%x: No submit", wreq->debug_id); 485 + 486 + if (flen < fsize) 487 + for (int s = 0; s < NR_IO_STREAMS; s++) 488 + netfs_issue_write(wreq, &wreq->io_streams[s]); 489 + 490 + _leave(" = 0"); 491 + return 0; 492 + } 493 + 494 + /* 495 + * Write some of the pending data back to the server 496 + */ 497 + int netfs_writepages(struct address_space *mapping, 498 + struct writeback_control *wbc) 499 + { 500 + struct netfs_inode *ictx = netfs_inode(mapping->host); 501 + struct netfs_io_request *wreq = NULL; 502 + struct folio *folio; 503 + int error = 0; 504 + 505 + if (wbc->sync_mode == WB_SYNC_ALL) 506 + mutex_lock(&ictx->wb_lock); 507 + else if (!mutex_trylock(&ictx->wb_lock)) 508 + return 0; 509 + 510 + /* Need the first folio to be able to set up the op. */ 511 + folio = writeback_iter(mapping, wbc, NULL, &error); 512 + if (!folio) 513 + goto out; 514 + 515 + wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK); 516 + if (IS_ERR(wreq)) { 517 + error = PTR_ERR(wreq); 518 + goto couldnt_start; 519 + } 520 + 521 + trace_netfs_write(wreq, netfs_write_trace_writeback); 522 + netfs_stat(&netfs_n_wh_writepages); 523 + 524 + do { 525 + _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); 526 + 527 + /* It appears we don't have to handle cyclic writeback wrapping. */ 528 + WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); 529 + 530 + if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE && 531 + unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) { 532 + set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 533 + wreq->netfs_ops->begin_writeback(wreq); 534 + } 535 + 536 + error = netfs_write_folio(wreq, wbc, folio); 537 + if (error < 0) 538 + break; 539 + } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 540 + 541 + for (int s = 0; s < NR_IO_STREAMS; s++) 542 + netfs_issue_write(wreq, &wreq->io_streams[s]); 543 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 544 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 545 + 546 + mutex_unlock(&ictx->wb_lock); 547 + 548 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 549 + _leave(" = %d", error); 550 + return error; 551 + 552 + couldnt_start: 553 + netfs_kill_dirty_pages(mapping, wbc, folio); 554 + out: 555 + mutex_unlock(&ictx->wb_lock); 556 + _leave(" = %d", error); 557 + return error; 558 + } 559 + EXPORT_SYMBOL(netfs_writepages); 560 + 561 + /* 562 + * Begin a write operation for writing through the pagecache. 563 + */ 564 + struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 565 + { 566 + struct netfs_io_request *wreq = NULL; 567 + struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp)); 568 + 569 + mutex_lock(&ictx->wb_lock); 570 + 571 + wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, 572 + iocb->ki_pos, NETFS_WRITETHROUGH); 573 + if (IS_ERR(wreq)) { 574 + mutex_unlock(&ictx->wb_lock); 575 + return wreq; 576 + } 577 + 578 + wreq->io_streams[0].avail = true; 579 + trace_netfs_write(wreq, netfs_write_trace_writethrough); 580 + return wreq; 581 + } 582 + 583 + /* 584 + * Advance the state of the write operation used when writing through the 585 + * pagecache. Data has been copied into the pagecache that we need to append 586 + * to the request. If we've added more than wsize then we need to create a new 587 + * subrequest. 588 + */ 589 + int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 590 + struct folio *folio, size_t copied, bool to_page_end, 591 + struct folio **writethrough_cache) 592 + { 593 + _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 594 + wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); 595 + 596 + if (!*writethrough_cache) { 597 + if (folio_test_dirty(folio)) 598 + /* Sigh. mmap. */ 599 + folio_clear_dirty_for_io(folio); 600 + 601 + /* We can make multiple writes to the folio... */ 602 + folio_start_writeback(folio); 603 + if (wreq->len == 0) 604 + trace_netfs_folio(folio, netfs_folio_trace_wthru); 605 + else 606 + trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 607 + *writethrough_cache = folio; 608 + } 609 + 610 + wreq->len += copied; 611 + if (!to_page_end) 612 + return 0; 613 + 614 + *writethrough_cache = NULL; 615 + return netfs_write_folio(wreq, wbc, folio); 616 + } 617 + 618 + /* 619 + * End a write operation used when writing through the pagecache. 620 + */ 621 + int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 622 + struct folio *writethrough_cache) 623 + { 624 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 625 + int ret; 626 + 627 + _enter("R=%x", wreq->debug_id); 628 + 629 + if (writethrough_cache) 630 + netfs_write_folio(wreq, wbc, writethrough_cache); 631 + 632 + netfs_issue_write(wreq, &wreq->io_streams[0]); 633 + netfs_issue_write(wreq, &wreq->io_streams[1]); 634 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 635 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 636 + 637 + mutex_unlock(&ictx->wb_lock); 638 + 639 + ret = wreq->error; 640 + netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 641 + return ret; 642 + } 643 + 644 + /* 645 + * Write data to the server without going through the pagecache and without 646 + * writing it to the local cache. 647 + */ 648 + int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) 649 + { 650 + struct netfs_io_stream *upload = &wreq->io_streams[0]; 651 + ssize_t part; 652 + loff_t start = wreq->start; 653 + int error = 0; 654 + 655 + _enter("%zx", len); 656 + 657 + if (wreq->origin == NETFS_DIO_WRITE) 658 + inode_dio_begin(wreq->inode); 659 + 660 + while (len) { 661 + // TODO: Prepare content encryption 662 + 663 + _debug("unbuffered %zx", len); 664 + part = netfs_advance_write(wreq, upload, start, len, false); 665 + start += part; 666 + len -= part; 667 + if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 668 + trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); 669 + wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE); 670 + } 671 + if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) 672 + break; 673 + } 674 + 675 + netfs_issue_write(wreq, upload); 676 + 677 + smp_wmb(); /* Write lists before ALL_QUEUED. */ 678 + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 679 + if (list_empty(&upload->subrequests)) 680 + netfs_wake_write_collector(wreq, false); 681 + 682 + _leave(" = %d", error); 683 + return error; 684 + }

+4 -4

fs/nfs/file.c

··· 433 433 return; 434 434 /* Cancel any unstarted writes on this page */ 435 435 nfs_wb_folio_cancel(inode, folio); 436 - folio_wait_fscache(folio); 436 + folio_wait_private_2(folio); /* [DEPRECATED] */ 437 437 trace_nfs_invalidate_folio(inode, folio); 438 438 } 439 439 ··· 500 500 dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", 501 501 inode->i_ino, folio_pos(folio)); 502 502 503 - folio_wait_fscache(folio); 503 + folio_wait_private_2(folio); /* [DEPRECATED] */ 504 504 ret = nfs_wb_folio(inode, folio); 505 505 trace_nfs_launder_folio_done(inode, folio, ret); 506 506 return ret; ··· 593 593 sb_start_pagefault(inode->i_sb); 594 594 595 595 /* make sure the cache has finished storing the page */ 596 - if (folio_test_fscache(folio) && 597 - folio_wait_fscache_killable(folio) < 0) { 596 + if (folio_test_private_2(folio) && /* [DEPRECATED] */ 597 + folio_wait_private_2_killable(folio) < 0) { 598 598 ret = VM_FAULT_RETRY; 599 599 goto out; 600 600 }

+4 -2

fs/nfs/fscache.h

··· 81 81 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) 82 82 { 83 83 netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); 84 + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ 85 + __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); 84 86 } 85 87 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); 86 88 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); ··· 103 101 104 102 static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) 105 103 { 106 - if (folio_test_fscache(folio)) { 104 + if (folio_test_private_2(folio)) { /* [DEPRECATED] */ 107 105 if (current_is_kswapd() || !(gfp & __GFP_FS)) 108 106 return false; 109 - folio_wait_fscache(folio); 107 + folio_wait_private_2(folio); 110 108 } 111 109 fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host))); 112 110 return true;

+2 -2

fs/nfs/write.c

··· 2120 2120 if (folio_test_private(src)) 2121 2121 return -EBUSY; 2122 2122 2123 - if (folio_test_fscache(src)) { 2123 + if (folio_test_private_2(src)) { /* [DEPRECATED] */ 2124 2124 if (mode == MIGRATE_ASYNC) 2125 2125 return -EBUSY; 2126 - folio_wait_fscache(src); 2126 + folio_wait_private_2(src); 2127 2127 } 2128 2128 2129 2129 return migrate_folio(mapping, dst, src, mode);

+1

fs/smb/client/Kconfig

··· 2 2 config CIFS 3 3 tristate "SMB3 and CIFS support (advanced network filesystem)" 4 4 depends on INET 5 + select NETFS_SUPPORT 5 6 select NLS 6 7 select NLS_UCS2_UTILS 7 8 select CRYPTO

+64 -60

fs/smb/client/cifsfs.c

··· 371 371 static struct kmem_cache *cifs_req_cachep; 372 372 static struct kmem_cache *cifs_mid_cachep; 373 373 static struct kmem_cache *cifs_sm_req_cachep; 374 + static struct kmem_cache *cifs_io_request_cachep; 375 + static struct kmem_cache *cifs_io_subrequest_cachep; 374 376 mempool_t *cifs_sm_req_poolp; 375 377 mempool_t *cifs_req_poolp; 376 378 mempool_t *cifs_mid_poolp; 379 + mempool_t cifs_io_request_pool; 380 + mempool_t cifs_io_subrequest_pool; 377 381 378 382 static struct inode * 379 383 cifs_alloc_inode(struct super_block *sb) ··· 990 986 return root; 991 987 } 992 988 993 - 994 - static ssize_t 995 - cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) 996 - { 997 - ssize_t rc; 998 - struct inode *inode = file_inode(iocb->ki_filp); 999 - 1000 - if (iocb->ki_flags & IOCB_DIRECT) 1001 - return cifs_user_readv(iocb, iter); 1002 - 1003 - rc = cifs_revalidate_mapping(inode); 1004 - if (rc) 1005 - return rc; 1006 - 1007 - return generic_file_read_iter(iocb, iter); 1008 - } 1009 - 1010 - static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1011 - { 1012 - struct inode *inode = file_inode(iocb->ki_filp); 1013 - struct cifsInodeInfo *cinode = CIFS_I(inode); 1014 - ssize_t written; 1015 - int rc; 1016 - 1017 - if (iocb->ki_filp->f_flags & O_DIRECT) { 1018 - written = cifs_user_writev(iocb, from); 1019 - if (written > 0 && CIFS_CACHE_READ(cinode)) { 1020 - cifs_zap_mapping(inode); 1021 - cifs_dbg(FYI, 1022 - "Set no oplock for inode=%p after a write operation\n", 1023 - inode); 1024 - cinode->oplock = 0; 1025 - } 1026 - return written; 1027 - } 1028 - 1029 - written = cifs_get_writer(cinode); 1030 - if (written) 1031 - return written; 1032 - 1033 - written = generic_file_write_iter(iocb, from); 1034 - 1035 - if (CIFS_CACHE_WRITE(CIFS_I(inode))) 1036 - goto out; 1037 - 1038 - rc = filemap_fdatawrite(inode->i_mapping); 1039 - if (rc) 1040 - cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", 1041 - rc, inode); 1042 - 1043 - out: 1044 - cifs_put_writer(cinode); 1045 - return written; 1046 - } 1047 - 1048 989 static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) 1049 990 { 1050 991 struct cifsFileInfo *cfile = file->private_data; ··· 1291 1342 rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); 1292 1343 if (rc) 1293 1344 goto unlock; 1345 + if (fend > target_cifsi->netfs.zero_point) 1346 + target_cifsi->netfs.zero_point = fend + 1; 1294 1347 1295 1348 /* Discard all the folios that overlap the destination region. */ 1296 1349 cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); ··· 1311 1360 fscache_resize_cookie(cifs_inode_cookie(target_inode), 1312 1361 new_size); 1313 1362 } 1363 + if (rc == 0 && new_size > target_cifsi->netfs.zero_point) 1364 + target_cifsi->netfs.zero_point = new_size; 1314 1365 } 1315 1366 1316 1367 /* force revalidate of size and timestamps of target file now ··· 1404 1451 rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); 1405 1452 if (rc) 1406 1453 goto unlock; 1454 + if (fend > target_cifsi->netfs.zero_point) 1455 + target_cifsi->netfs.zero_point = fend + 1; 1407 1456 1408 1457 /* Discard all the folios that overlap the destination region. */ 1409 1458 truncate_inode_pages_range(&target_inode->i_data, fstart, fend); ··· 1522 1567 }; 1523 1568 1524 1569 const struct file_operations cifs_file_direct_ops = { 1525 - .read_iter = cifs_direct_readv, 1526 - .write_iter = cifs_direct_writev, 1570 + .read_iter = netfs_unbuffered_read_iter, 1571 + .write_iter = netfs_file_write_iter, 1527 1572 .open = cifs_open, 1528 1573 .release = cifs_close, 1529 1574 .lock = cifs_lock, ··· 1578 1623 }; 1579 1624 1580 1625 const struct file_operations cifs_file_direct_nobrl_ops = { 1581 - .read_iter = cifs_direct_readv, 1582 - .write_iter = cifs_direct_writev, 1626 + .read_iter = netfs_unbuffered_read_iter, 1627 + .write_iter = netfs_file_write_iter, 1583 1628 .open = cifs_open, 1584 1629 .release = cifs_close, 1585 1630 .fsync = cifs_fsync, ··· 1754 1799 kmem_cache_destroy(cifs_mid_cachep); 1755 1800 } 1756 1801 1802 + static int cifs_init_netfs(void) 1803 + { 1804 + cifs_io_request_cachep = 1805 + kmem_cache_create("cifs_io_request", 1806 + sizeof(struct cifs_io_request), 0, 1807 + SLAB_HWCACHE_ALIGN, NULL); 1808 + if (!cifs_io_request_cachep) 1809 + goto nomem_req; 1810 + 1811 + if (mempool_init_slab_pool(&cifs_io_request_pool, 100, cifs_io_request_cachep) < 0) 1812 + goto nomem_reqpool; 1813 + 1814 + cifs_io_subrequest_cachep = 1815 + kmem_cache_create("cifs_io_subrequest", 1816 + sizeof(struct cifs_io_subrequest), 0, 1817 + SLAB_HWCACHE_ALIGN, NULL); 1818 + if (!cifs_io_subrequest_cachep) 1819 + goto nomem_subreq; 1820 + 1821 + if (mempool_init_slab_pool(&cifs_io_subrequest_pool, 100, cifs_io_subrequest_cachep) < 0) 1822 + goto nomem_subreqpool; 1823 + 1824 + return 0; 1825 + 1826 + nomem_subreqpool: 1827 + kmem_cache_destroy(cifs_io_subrequest_cachep); 1828 + nomem_subreq: 1829 + mempool_destroy(&cifs_io_request_pool); 1830 + nomem_reqpool: 1831 + kmem_cache_destroy(cifs_io_request_cachep); 1832 + nomem_req: 1833 + return -ENOMEM; 1834 + } 1835 + 1836 + static void cifs_destroy_netfs(void) 1837 + { 1838 + mempool_destroy(&cifs_io_subrequest_pool); 1839 + kmem_cache_destroy(cifs_io_subrequest_cachep); 1840 + mempool_destroy(&cifs_io_request_pool); 1841 + kmem_cache_destroy(cifs_io_request_cachep); 1842 + } 1843 + 1757 1844 static int __init 1758 1845 init_cifs(void) 1759 1846 { ··· 1900 1903 if (rc) 1901 1904 goto out_destroy_deferredclose_wq; 1902 1905 1903 - rc = init_mids(); 1906 + rc = cifs_init_netfs(); 1904 1907 if (rc) 1905 1908 goto out_destroy_inodecache; 1909 + 1910 + rc = init_mids(); 1911 + if (rc) 1912 + goto out_destroy_netfs; 1906 1913 1907 1914 rc = cifs_init_request_bufs(); 1908 1915 if (rc) ··· 1962 1961 cifs_destroy_request_bufs(); 1963 1962 out_destroy_mids: 1964 1963 destroy_mids(); 1964 + out_destroy_netfs: 1965 + cifs_destroy_netfs(); 1965 1966 out_destroy_inodecache: 1966 1967 cifs_destroy_inodecache(); 1967 1968 out_destroy_deferredclose_wq: ··· 2002 1999 #endif 2003 2000 cifs_destroy_request_bufs(); 2004 2001 destroy_mids(); 2002 + cifs_destroy_netfs(); 2005 2003 cifs_destroy_inodecache(); 2006 2004 destroy_workqueue(deferredclose_wq); 2007 2005 destroy_workqueue(cifsoplockd_wq);

+3 -8

fs/smb/client/cifsfs.h

··· 69 69 extern int cifs_revalidate_dentry_attr(struct dentry *); 70 70 extern int cifs_revalidate_file(struct file *filp); 71 71 extern int cifs_revalidate_dentry(struct dentry *); 72 - extern int cifs_invalidate_mapping(struct inode *inode); 73 72 extern int cifs_revalidate_mapping(struct inode *inode); 74 73 extern int cifs_zap_mapping(struct inode *inode); 75 74 extern int cifs_getattr(struct mnt_idmap *, const struct path *, ··· 84 85 85 86 86 87 /* Functions related to files and directories */ 88 + extern const struct netfs_request_ops cifs_req_ops; 87 89 extern const struct file_operations cifs_file_ops; 88 90 extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ 89 91 extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */ ··· 94 94 extern int cifs_open(struct inode *inode, struct file *file); 95 95 extern int cifs_close(struct inode *inode, struct file *file); 96 96 extern int cifs_closedir(struct inode *inode, struct file *file); 97 - extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); 98 - extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); 99 97 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); 100 - extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); 101 - extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); 102 98 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); 99 + ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); 100 + ssize_t cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter); 103 101 extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock); 104 102 extern int cifs_lock(struct file *, int, struct file_lock *); 105 103 extern int cifs_fsync(struct file *, loff_t, loff_t, int); ··· 108 110 extern const struct file_operations cifs_dir_ops; 109 111 extern int cifs_dir_open(struct inode *inode, struct file *file); 110 112 extern int cifs_readdir(struct file *file, struct dir_context *ctx); 111 - extern void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len); 112 - extern void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len); 113 - extern void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len); 114 113 115 114 /* Functions related to dir entries */ 116 115 extern const struct dentry_operations cifs_dentry_ops;

+23 -42

fs/smb/client/cifsglob.h

··· 268 268 struct cifs_fattr; 269 269 struct smb3_fs_context; 270 270 struct cifs_fid; 271 - struct cifs_readdata; 272 - struct cifs_writedata; 271 + struct cifs_io_subrequest; 273 272 struct cifs_io_parms; 274 273 struct cifs_search_info; 275 274 struct cifsInodeInfo; ··· 449 450 /* send a flush request to the server */ 450 451 int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); 451 452 /* async read from the server */ 452 - int (*async_readv)(struct cifs_readdata *); 453 + int (*async_readv)(struct cifs_io_subrequest *); 453 454 /* async write to the server */ 454 - int (*async_writev)(struct cifs_writedata *, 455 - void (*release)(struct kref *)); 455 + void (*async_writev)(struct cifs_io_subrequest *); 456 456 /* sync read from the server */ 457 457 int (*sync_read)(const unsigned int, struct cifs_fid *, 458 458 struct cifs_io_parms *, unsigned int *, char **, ··· 546 548 /* writepages retry size */ 547 549 unsigned int (*wp_retry_size)(struct inode *); 548 550 /* get mtu credits */ 549 - int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, 550 - unsigned int *, struct cifs_credits *); 551 + int (*wait_mtu_credits)(struct TCP_Server_Info *, size_t, 552 + size_t *, struct cifs_credits *); 551 553 /* adjust previously taken mtu credits to request size */ 552 554 int (*adjust_credits)(struct TCP_Server_Info *server, 553 555 struct cifs_credits *credits, ··· 881 883 882 884 static inline void 883 885 add_credits_and_wake_if(struct TCP_Server_Info *server, 884 - const struct cifs_credits *credits, const int optype) 886 + struct cifs_credits *credits, const int optype) 885 887 { 886 888 if (credits->value) { 887 889 server->ops->add_credits(server, credits, optype); 888 890 wake_up(&server->request_q); 891 + credits->value = 0; 889 892 } 890 893 } 891 894 ··· 1491 1492 bool direct_io; 1492 1493 }; 1493 1494 1494 - /* asynchronous read support */ 1495 - struct cifs_readdata { 1496 - struct kref refcount; 1497 - struct list_head list; 1498 - struct completion done; 1495 + struct cifs_io_request { 1496 + struct netfs_io_request rreq; 1499 1497 struct cifsFileInfo *cfile; 1500 - struct address_space *mapping; 1501 - struct cifs_aio_ctx *ctx; 1502 - __u64 offset; 1498 + }; 1499 + 1500 + /* asynchronous read support */ 1501 + struct cifs_io_subrequest { 1502 + union { 1503 + struct netfs_io_subrequest subreq; 1504 + struct netfs_io_request *rreq; 1505 + struct cifs_io_request *req; 1506 + }; 1503 1507 ssize_t got_bytes; 1504 - unsigned int bytes; 1505 1508 pid_t pid; 1509 + unsigned int xid; 1506 1510 int result; 1507 - struct work_struct work; 1508 - struct iov_iter iter; 1511 + bool have_xid; 1512 + bool replay; 1509 1513 struct kvec iov[2]; 1510 1514 struct TCP_Server_Info *server; 1511 1515 #ifdef CONFIG_CIFS_SMB_DIRECT 1512 1516 struct smbd_mr *mr; 1513 1517 #endif 1514 1518 struct cifs_credits credits; 1515 - }; 1516 - 1517 - /* asynchronous write support */ 1518 - struct cifs_writedata { 1519 - struct kref refcount; 1520 - struct list_head list; 1521 - struct completion done; 1522 - enum writeback_sync_modes sync_mode; 1523 - struct work_struct work; 1524 - struct cifsFileInfo *cfile; 1525 - struct cifs_aio_ctx *ctx; 1526 - struct iov_iter iter; 1527 - struct bio_vec *bv; 1528 - __u64 offset; 1529 - pid_t pid; 1530 - unsigned int bytes; 1531 - int result; 1532 - struct TCP_Server_Info *server; 1533 - #ifdef CONFIG_CIFS_SMB_DIRECT 1534 - struct smbd_mr *mr; 1535 - #endif 1536 - struct cifs_credits credits; 1537 - bool replay; 1538 1519 }; 1539 1520 1540 1521 /* ··· 2094 2115 extern mempool_t *cifs_sm_req_poolp; 2095 2116 extern mempool_t *cifs_req_poolp; 2096 2117 extern mempool_t *cifs_mid_poolp; 2118 + extern mempool_t cifs_io_request_pool; 2119 + extern mempool_t cifs_io_subrequest_pool; 2097 2120 2098 2121 /* Operations for different SMB versions */ 2099 2122 #define SMB1_VERSION_STRING "1.0"

+5 -7

fs/smb/client/cifsproto.h

··· 121 121 extern int cifs_check_receive(struct mid_q_entry *mid, 122 122 struct TCP_Server_Info *server, bool log_error); 123 123 extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, 124 - unsigned int size, unsigned int *num, 124 + size_t size, size_t *num, 125 125 struct cifs_credits *credits); 126 126 extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 127 127 struct kvec *, int /* nvec to send */, ··· 148 148 bool from_readdir); 149 149 extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 150 150 unsigned int bytes_written); 151 + void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, 152 + bool was_async); 151 153 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); 152 154 extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, 153 155 int flags, ··· 601 599 extern struct cifs_ses * 602 600 cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx); 603 601 604 - void cifs_readdata_release(struct kref *refcount); 605 - int cifs_async_readv(struct cifs_readdata *rdata); 602 + int cifs_async_readv(struct cifs_io_subrequest *rdata); 606 603 int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); 607 604 608 - int cifs_async_writev(struct cifs_writedata *wdata, 609 - void (*release)(struct kref *kref)); 605 + void cifs_async_writev(struct cifs_io_subrequest *wdata); 610 606 void cifs_writev_complete(struct work_struct *work); 611 - struct cifs_writedata *cifs_writedata_alloc(work_func_t complete); 612 - void cifs_writedata_release(struct kref *refcount); 613 607 int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, 614 608 struct cifs_sb_info *cifs_sb, 615 609 const unsigned char *path, char *pbuf,

+63 -57

fs/smb/client/cifssmb.c

··· 24 24 #include <linux/swap.h> 25 25 #include <linux/task_io_accounting_ops.h> 26 26 #include <linux/uaccess.h> 27 + #include <linux/netfs.h> 28 + #include <trace/events/netfs.h> 27 29 #include "cifspdu.h" 28 30 #include "cifsfs.h" 29 31 #include "cifsglob.h" ··· 1264 1262 static void 1265 1263 cifs_readv_callback(struct mid_q_entry *mid) 1266 1264 { 1267 - struct cifs_readdata *rdata = mid->callback_data; 1268 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1265 + struct cifs_io_subrequest *rdata = mid->callback_data; 1266 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 1269 1267 struct TCP_Server_Info *server = tcon->ses->server; 1270 1268 struct smb_rqst rqst = { .rq_iov = rdata->iov, 1271 1269 .rq_nvec = 2, 1272 - .rq_iter_size = iov_iter_count(&rdata->iter), 1273 - .rq_iter = rdata->iter }; 1270 + .rq_iter = rdata->subreq.io_iter }; 1274 1271 struct cifs_credits credits = { .value = 1, .instance = 0 }; 1275 1272 1276 - cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", 1273 + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", 1277 1274 __func__, mid->mid, mid->mid_state, rdata->result, 1278 - rdata->bytes); 1275 + rdata->subreq.len); 1279 1276 1280 1277 switch (mid->mid_state) { 1281 1278 case MID_RESPONSE_RECEIVED: ··· 1306 1305 rdata->result = -EIO; 1307 1306 } 1308 1307 1309 - queue_work(cifsiod_wq, &rdata->work); 1308 + if (rdata->result == 0 || rdata->result == -EAGAIN) 1309 + iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); 1310 + rdata->credits.value = 0; 1311 + netfs_subreq_terminated(&rdata->subreq, 1312 + (rdata->result == 0 || rdata->result == -EAGAIN) ? 1313 + rdata->got_bytes : rdata->result, 1314 + false); 1310 1315 release_mid(mid); 1311 1316 add_credits(server, &credits, 0); 1312 1317 } 1313 1318 1314 1319 /* cifs_async_readv - send an async write, and set up mid to handle result */ 1315 1320 int 1316 - cifs_async_readv(struct cifs_readdata *rdata) 1321 + cifs_async_readv(struct cifs_io_subrequest *rdata) 1317 1322 { 1318 1323 int rc; 1319 1324 READ_REQ *smb = NULL; 1320 1325 int wct; 1321 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1326 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 1322 1327 struct smb_rqst rqst = { .rq_iov = rdata->iov, 1323 1328 .rq_nvec = 2 }; 1324 1329 1325 - cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 1326 - __func__, rdata->offset, rdata->bytes); 1330 + cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", 1331 + __func__, rdata->subreq.start, rdata->subreq.len); 1327 1332 1328 1333 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1329 1334 wct = 12; 1330 1335 else { 1331 1336 wct = 10; /* old style read */ 1332 - if ((rdata->offset >> 32) > 0) { 1337 + if ((rdata->subreq.start >> 32) > 0) { 1333 1338 /* can not handle this big offset for old */ 1334 1339 return -EIO; 1335 1340 } ··· 1349 1342 smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); 1350 1343 1351 1344 smb->AndXCommand = 0xFF; /* none */ 1352 - smb->Fid = rdata->cfile->fid.netfid; 1353 - smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); 1345 + smb->Fid = rdata->req->cfile->fid.netfid; 1346 + smb->OffsetLow = cpu_to_le32(rdata->subreq.start & 0xFFFFFFFF); 1354 1347 if (wct == 12) 1355 - smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); 1348 + smb->OffsetHigh = cpu_to_le32(rdata->subreq.start >> 32); 1356 1349 smb->Remaining = 0; 1357 - smb->MaxCount = cpu_to_le16(rdata->bytes & 0xFFFF); 1358 - smb->MaxCountHigh = cpu_to_le32(rdata->bytes >> 16); 1350 + smb->MaxCount = cpu_to_le16(rdata->subreq.len & 0xFFFF); 1351 + smb->MaxCountHigh = cpu_to_le32(rdata->subreq.len >> 16); 1359 1352 if (wct == 12) 1360 1353 smb->ByteCount = 0; 1361 1354 else { ··· 1371 1364 rdata->iov[1].iov_base = (char *)smb + 4; 1372 1365 rdata->iov[1].iov_len = get_rfc1002_length(smb); 1373 1366 1374 - kref_get(&rdata->refcount); 1375 1367 rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, 1376 1368 cifs_readv_callback, NULL, rdata, 0, NULL); 1377 1369 1378 1370 if (rc == 0) 1379 1371 cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); 1380 - else 1381 - kref_put(&rdata->refcount, cifs_readdata_release); 1382 - 1383 1372 cifs_small_buf_release(smb); 1384 1373 return rc; 1385 1374 } ··· 1618 1615 static void 1619 1616 cifs_writev_callback(struct mid_q_entry *mid) 1620 1617 { 1621 - struct cifs_writedata *wdata = mid->callback_data; 1622 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1623 - unsigned int written; 1618 + struct cifs_io_subrequest *wdata = mid->callback_data; 1619 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 1624 1620 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; 1625 1621 struct cifs_credits credits = { .value = 1, .instance = 0 }; 1622 + ssize_t result; 1623 + size_t written; 1626 1624 1627 1625 switch (mid->mid_state) { 1628 1626 case MID_RESPONSE_RECEIVED: 1629 - wdata->result = cifs_check_receive(mid, tcon->ses->server, 0); 1630 - if (wdata->result != 0) 1627 + result = cifs_check_receive(mid, tcon->ses->server, 0); 1628 + if (result != 0) 1631 1629 break; 1632 1630 1633 1631 written = le16_to_cpu(smb->CountHigh); ··· 1640 1636 * client. OS/2 servers are known to set incorrect 1641 1637 * CountHigh values. 1642 1638 */ 1643 - if (written > wdata->bytes) 1639 + if (written > wdata->subreq.len) 1644 1640 written &= 0xFFFF; 1645 1641 1646 - if (written < wdata->bytes) 1647 - wdata->result = -ENOSPC; 1642 + if (written < wdata->subreq.len) 1643 + result = -ENOSPC; 1648 1644 else 1649 - wdata->bytes = written; 1645 + result = written; 1650 1646 break; 1651 1647 case MID_REQUEST_SUBMITTED: 1652 1648 case MID_RETRY_NEEDED: 1653 - wdata->result = -EAGAIN; 1649 + result = -EAGAIN; 1654 1650 break; 1655 1651 default: 1656 - wdata->result = -EIO; 1652 + result = -EIO; 1657 1653 break; 1658 1654 } 1659 1655 1660 - queue_work(cifsiod_wq, &wdata->work); 1656 + wdata->credits.value = 0; 1657 + cifs_write_subrequest_terminated(wdata, result, true); 1661 1658 release_mid(mid); 1662 1659 add_credits(tcon->ses->server, &credits, 0); 1663 1660 } 1664 1661 1665 1662 /* cifs_async_writev - send an async write, and set up mid to handle result */ 1666 - int 1667 - cifs_async_writev(struct cifs_writedata *wdata, 1668 - void (*release)(struct kref *kref)) 1663 + void 1664 + cifs_async_writev(struct cifs_io_subrequest *wdata) 1669 1665 { 1670 1666 int rc = -EACCES; 1671 1667 WRITE_REQ *smb = NULL; 1672 1668 int wct; 1673 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1669 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 1674 1670 struct kvec iov[2]; 1675 1671 struct smb_rqst rqst = { }; 1676 1672 ··· 1678 1674 wct = 14; 1679 1675 } else { 1680 1676 wct = 12; 1681 - if (wdata->offset >> 32 > 0) { 1677 + if (wdata->subreq.start >> 32 > 0) { 1682 1678 /* can not handle big offset for old srv */ 1683 - return -EIO; 1679 + rc = -EIO; 1680 + goto out; 1684 1681 } 1685 1682 } 1686 1683 ··· 1693 1688 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); 1694 1689 1695 1690 smb->AndXCommand = 0xFF; /* none */ 1696 - smb->Fid = wdata->cfile->fid.netfid; 1697 - smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); 1691 + smb->Fid = wdata->req->cfile->fid.netfid; 1692 + smb->OffsetLow = cpu_to_le32(wdata->subreq.start & 0xFFFFFFFF); 1698 1693 if (wct == 14) 1699 - smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); 1694 + smb->OffsetHigh = cpu_to_le32(wdata->subreq.start >> 32); 1700 1695 smb->Reserved = 0xFFFFFFFF; 1701 1696 smb->WriteMode = 0; 1702 1697 smb->Remaining = 0; ··· 1712 1707 1713 1708 rqst.rq_iov = iov; 1714 1709 rqst.rq_nvec = 2; 1715 - rqst.rq_iter = wdata->iter; 1716 - rqst.rq_iter_size = iov_iter_count(&wdata->iter); 1710 + rqst.rq_iter = wdata->subreq.io_iter; 1711 + rqst.rq_iter_size = iov_iter_count(&wdata->subreq.io_iter); 1717 1712 1718 - cifs_dbg(FYI, "async write at %llu %u bytes\n", 1719 - wdata->offset, wdata->bytes); 1713 + cifs_dbg(FYI, "async write at %llu %zu bytes\n", 1714 + wdata->subreq.start, wdata->subreq.len); 1720 1715 1721 - smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF); 1722 - smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16); 1716 + smb->DataLengthLow = cpu_to_le16(wdata->subreq.len & 0xFFFF); 1717 + smb->DataLengthHigh = cpu_to_le16(wdata->subreq.len >> 16); 1723 1718 1724 1719 if (wct == 14) { 1725 - inc_rfc1001_len(&smb->hdr, wdata->bytes + 1); 1726 - put_bcc(wdata->bytes + 1, &smb->hdr); 1720 + inc_rfc1001_len(&smb->hdr, wdata->subreq.len + 1); 1721 + put_bcc(wdata->subreq.len + 1, &smb->hdr); 1727 1722 } else { 1728 1723 /* wct == 12 */ 1729 1724 struct smb_com_writex_req *smbw = 1730 1725 (struct smb_com_writex_req *)smb; 1731 - inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); 1732 - put_bcc(wdata->bytes + 5, &smbw->hdr); 1726 + inc_rfc1001_len(&smbw->hdr, wdata->subreq.len + 5); 1727 + put_bcc(wdata->subreq.len + 5, &smbw->hdr); 1733 1728 iov[1].iov_len += 4; /* pad bigger by four bytes */ 1734 1729 } 1735 1730 1736 - kref_get(&wdata->refcount); 1737 1731 rc = cifs_call_async(tcon->ses->server, &rqst, NULL, 1738 1732 cifs_writev_callback, NULL, wdata, 0, NULL); 1739 - 1733 + /* Can't touch wdata if rc == 0 */ 1740 1734 if (rc == 0) 1741 1735 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); 1742 - else 1743 - kref_put(&wdata->refcount, release); 1744 1736 1745 1737 async_writev_out: 1746 1738 cifs_small_buf_release(smb); 1747 - return rc; 1739 + out: 1740 + if (rc) { 1741 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 1742 + cifs_write_subrequest_terminated(wdata, rc, false); 1743 + } 1748 1744 } 1749 1745 1750 1746 int

+370 -2384

fs/smb/client/file.c

··· 36 36 #include "fs_context.h" 37 37 #include "cifs_ioctl.h" 38 38 #include "cached_dir.h" 39 + #include <trace/events/netfs.h> 40 + 41 + static int cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush); 39 42 40 43 /* 41 - * Remove the dirty flags from a span of pages. 44 + * Prepare a subrequest to upload to the server. We need to allocate credits 45 + * so that we know the maximum amount of data that we can include in it. 42 46 */ 43 - static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 47 + static void cifs_prepare_write(struct netfs_io_subrequest *subreq) 44 48 { 45 - struct address_space *mapping = inode->i_mapping; 46 - struct folio *folio; 47 - pgoff_t end; 49 + struct cifs_io_subrequest *wdata = 50 + container_of(subreq, struct cifs_io_subrequest, subreq); 51 + struct cifs_io_request *req = wdata->req; 52 + struct TCP_Server_Info *server; 53 + struct cifsFileInfo *open_file = req->cfile; 54 + size_t wsize = req->rreq.wsize; 55 + int rc; 48 56 49 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 - 51 - rcu_read_lock(); 52 - 53 - end = (start + len - 1) / PAGE_SIZE; 54 - xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 - if (xas_retry(&xas, folio)) 56 - continue; 57 - xas_pause(&xas); 58 - rcu_read_unlock(); 59 - folio_lock(folio); 60 - folio_clear_dirty_for_io(folio); 61 - folio_unlock(folio); 62 - rcu_read_lock(); 57 + if (!wdata->have_xid) { 58 + wdata->xid = get_xid(); 59 + wdata->have_xid = true; 63 60 } 64 61 65 - rcu_read_unlock(); 62 + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 63 + wdata->server = server; 64 + 65 + retry: 66 + if (open_file->invalidHandle) { 67 + rc = cifs_reopen_file(open_file, false); 68 + if (rc < 0) { 69 + if (rc == -EAGAIN) 70 + goto retry; 71 + subreq->error = rc; 72 + return netfs_prepare_write_failed(subreq); 73 + } 74 + } 75 + 76 + rc = server->ops->wait_mtu_credits(server, wsize, &wdata->subreq.max_len, 77 + &wdata->credits); 78 + if (rc < 0) { 79 + subreq->error = rc; 80 + return netfs_prepare_write_failed(subreq); 81 + } 82 + 83 + #ifdef CONFIG_CIFS_SMB_DIRECT 84 + if (server->smbd_conn) 85 + subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; 86 + #endif 66 87 } 67 88 68 89 /* 69 - * Completion of write to server. 90 + * Issue a subrequest to upload to the server. 70 91 */ 71 - void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 92 + static void cifs_issue_write(struct netfs_io_subrequest *subreq) 72 93 { 73 - struct address_space *mapping = inode->i_mapping; 74 - struct folio *folio; 75 - pgoff_t end; 94 + struct cifs_io_subrequest *wdata = 95 + container_of(subreq, struct cifs_io_subrequest, subreq); 96 + struct cifs_sb_info *sbi = CIFS_SB(subreq->rreq->inode->i_sb); 97 + int rc; 76 98 77 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 99 + if (cifs_forced_shutdown(sbi)) { 100 + rc = -EIO; 101 + goto fail; 102 + } 78 103 79 - if (!len) 104 + rc = adjust_credits(wdata->server, &wdata->credits, wdata->subreq.len); 105 + if (rc) 106 + goto fail; 107 + 108 + rc = -EAGAIN; 109 + if (wdata->req->cfile->invalidHandle) 110 + goto fail; 111 + 112 + wdata->server->ops->async_writev(wdata); 113 + out: 114 + return; 115 + 116 + fail: 117 + if (rc == -EAGAIN) 118 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 119 + else 120 + trace_netfs_sreq(subreq, netfs_sreq_trace_fail); 121 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 122 + cifs_write_subrequest_terminated(wdata, rc, false); 123 + goto out; 124 + } 125 + 126 + /* 127 + * Split the read up according to how many credits we can get for each piece. 128 + * It's okay to sleep here if we need to wait for more credit to become 129 + * available. 130 + * 131 + * We also choose the server and allocate an operation ID to be cleaned up 132 + * later. 133 + */ 134 + static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) 135 + { 136 + struct netfs_io_request *rreq = subreq->rreq; 137 + struct TCP_Server_Info *server; 138 + struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 139 + struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 140 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 141 + size_t rsize = 0; 142 + int rc; 143 + 144 + rdata->xid = get_xid(); 145 + rdata->have_xid = true; 146 + 147 + server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); 148 + rdata->server = server; 149 + 150 + if (cifs_sb->ctx->rsize == 0) 151 + cifs_sb->ctx->rsize = 152 + server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), 153 + cifs_sb->ctx); 154 + 155 + 156 + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, 157 + &rdata->credits); 158 + if (rc) { 159 + subreq->error = rc; 160 + return false; 161 + } 162 + 163 + subreq->len = min_t(size_t, subreq->len, rsize); 164 + #ifdef CONFIG_CIFS_SMB_DIRECT 165 + if (server->smbd_conn) 166 + subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; 167 + #endif 168 + return true; 169 + } 170 + 171 + /* 172 + * Issue a read operation on behalf of the netfs helper functions. We're asked 173 + * to make a read of a certain size at a point in the file. We are permitted 174 + * to only read a portion of that, but as long as we read something, the netfs 175 + * helper will call us again so that we can issue another read. 176 + */ 177 + static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) 178 + { 179 + struct netfs_io_request *rreq = subreq->rreq; 180 + struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); 181 + struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); 182 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 183 + pid_t pid; 184 + int rc = 0; 185 + 186 + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 187 + pid = req->cfile->pid; 188 + else 189 + pid = current->tgid; // Ummm... This may be a workqueue 190 + 191 + cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", 192 + __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, 193 + subreq->transferred, subreq->len); 194 + 195 + if (req->cfile->invalidHandle) { 196 + do { 197 + rc = cifs_reopen_file(req->cfile, true); 198 + } while (rc == -EAGAIN); 199 + if (rc) 200 + goto out; 201 + } 202 + 203 + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 204 + rdata->pid = pid; 205 + 206 + rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len); 207 + if (!rc) { 208 + if (rdata->req->cfile->invalidHandle) 209 + rc = -EAGAIN; 210 + else 211 + rc = rdata->server->ops->async_readv(rdata); 212 + } 213 + 214 + out: 215 + if (rc) 216 + netfs_subreq_terminated(subreq, rc, false); 217 + } 218 + 219 + /* 220 + * Writeback calls this when it finds a folio that needs uploading. This isn't 221 + * called if writeback only has copy-to-cache to deal with. 222 + */ 223 + static void cifs_begin_writeback(struct netfs_io_request *wreq) 224 + { 225 + struct cifs_io_request *req = container_of(wreq, struct cifs_io_request, rreq); 226 + int ret; 227 + 228 + ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_WR_ANY, &req->cfile); 229 + if (ret) { 230 + cifs_dbg(VFS, "No writable handle in writepages ret=%d\n", ret); 231 + return; 232 + } 233 + 234 + wreq->io_streams[0].avail = true; 235 + } 236 + 237 + /* 238 + * Initialise a request. 239 + */ 240 + static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) 241 + { 242 + struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); 243 + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); 244 + struct cifsFileInfo *open_file = NULL; 245 + 246 + rreq->rsize = cifs_sb->ctx->rsize; 247 + rreq->wsize = cifs_sb->ctx->wsize; 248 + 249 + if (file) { 250 + open_file = file->private_data; 251 + rreq->netfs_priv = file->private_data; 252 + req->cfile = cifsFileInfo_get(open_file); 253 + } else if (rreq->origin != NETFS_WRITEBACK) { 254 + WARN_ON_ONCE(1); 255 + return -EIO; 256 + } 257 + 258 + return 0; 259 + } 260 + 261 + /* 262 + * Expand the size of a readahead to the size of the rsize, if at least as 263 + * large as a page, allowing for the possibility that rsize is not pow-2 264 + * aligned. 265 + */ 266 + static void cifs_expand_readahead(struct netfs_io_request *rreq) 267 + { 268 + unsigned int rsize = rreq->rsize; 269 + loff_t misalignment, i_size = i_size_read(rreq->inode); 270 + 271 + if (rsize < PAGE_SIZE) 80 272 return; 81 273 82 - rcu_read_lock(); 274 + if (rsize < INT_MAX) 275 + rsize = roundup_pow_of_two(rsize); 276 + else 277 + rsize = ((unsigned int)INT_MAX + 1) / 2; 83 278 84 - end = (start + len - 1) / PAGE_SIZE; 85 - xas_for_each(&xas, folio, end) { 86 - if (xas_retry(&xas, folio)) 87 - continue; 88 - if (!folio_test_writeback(folio)) { 89 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 - len, start, folio->index, end); 91 - continue; 92 - } 93 - 94 - folio_detach_private(folio); 95 - folio_end_writeback(folio); 279 + misalignment = rreq->start & (rsize - 1); 280 + if (misalignment) { 281 + rreq->start -= misalignment; 282 + rreq->len += misalignment; 96 283 } 97 284 98 - rcu_read_unlock(); 285 + rreq->len = round_up(rreq->len, rsize); 286 + if (rreq->start < i_size && rreq->len > i_size - rreq->start) 287 + rreq->len = i_size - rreq->start; 99 288 } 100 289 101 290 /* 102 - * Failure of write to server. 291 + * Completion of a request operation. 103 292 */ 104 - void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 293 + static void cifs_rreq_done(struct netfs_io_request *rreq) 105 294 { 106 - struct address_space *mapping = inode->i_mapping; 107 - struct folio *folio; 108 - pgoff_t end; 295 + struct timespec64 atime, mtime; 296 + struct inode *inode = rreq->inode; 109 297 110 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 - 112 - if (!len) 113 - return; 114 - 115 - rcu_read_lock(); 116 - 117 - end = (start + len - 1) / PAGE_SIZE; 118 - xas_for_each(&xas, folio, end) { 119 - if (xas_retry(&xas, folio)) 120 - continue; 121 - if (!folio_test_writeback(folio)) { 122 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 - len, start, folio->index, end); 124 - continue; 125 - } 126 - 127 - folio_set_error(folio); 128 - folio_end_writeback(folio); 129 - } 130 - 131 - rcu_read_unlock(); 298 + /* we do not want atime to be less than mtime, it broke some apps */ 299 + atime = inode_set_atime_to_ts(inode, current_time(inode)); 300 + mtime = inode_get_mtime(inode); 301 + if (timespec64_compare(&atime, &mtime)) 302 + inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 132 303 } 133 304 134 - /* 135 - * Redirty pages after a temporary failure. 136 - */ 137 - void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 305 + static void cifs_post_modify(struct inode *inode) 138 306 { 139 - struct address_space *mapping = inode->i_mapping; 140 - struct folio *folio; 141 - pgoff_t end; 307 + /* Indication to update ctime and mtime as close is deferred */ 308 + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 309 + } 142 310 143 - XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 311 + static void cifs_free_request(struct netfs_io_request *rreq) 312 + { 313 + struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); 144 314 145 - if (!len) 146 - return; 315 + if (req->cfile) 316 + cifsFileInfo_put(req->cfile); 317 + } 147 318 148 - rcu_read_lock(); 319 + static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) 320 + { 321 + struct cifs_io_subrequest *rdata = 322 + container_of(subreq, struct cifs_io_subrequest, subreq); 323 + int rc = subreq->error; 149 324 150 - end = (start + len - 1) / PAGE_SIZE; 151 - xas_for_each(&xas, folio, end) { 152 - if (!folio_test_writeback(folio)) { 153 - WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 - len, start, folio->index, end); 155 - continue; 325 + if (rdata->subreq.source == NETFS_DOWNLOAD_FROM_SERVER) { 326 + #ifdef CONFIG_CIFS_SMB_DIRECT 327 + if (rdata->mr) { 328 + smbd_deregister_mr(rdata->mr); 329 + rdata->mr = NULL; 156 330 } 157 - 158 - filemap_dirty_folio(folio->mapping, folio); 159 - folio_end_writeback(folio); 331 + #endif 160 332 } 161 333 162 - rcu_read_unlock(); 334 + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); 335 + if (rdata->have_xid) 336 + free_xid(rdata->xid); 163 337 } 338 + 339 + const struct netfs_request_ops cifs_req_ops = { 340 + .request_pool = &cifs_io_request_pool, 341 + .subrequest_pool = &cifs_io_subrequest_pool, 342 + .init_request = cifs_init_request, 343 + .free_request = cifs_free_request, 344 + .free_subrequest = cifs_free_subrequest, 345 + .expand_readahead = cifs_expand_readahead, 346 + .clamp_length = cifs_clamp_length, 347 + .issue_read = cifs_req_issue_read, 348 + .done = cifs_rreq_done, 349 + .post_modify = cifs_post_modify, 350 + .begin_writeback = cifs_begin_writeback, 351 + .prepare_write = cifs_prepare_write, 352 + .issue_write = cifs_issue_write, 353 + }; 164 354 165 355 /* 166 356 * Mark as invalid, all open files on tree connections since they ··· 2397 2207 return rc; 2398 2208 } 2399 2209 2400 - /* 2401 - * update the file size (if needed) after a write. Should be called with 2402 - * the inode->i_lock held 2403 - */ 2404 - void 2405 - cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2406 - unsigned int bytes_written) 2210 + void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, 2211 + bool was_async) 2407 2212 { 2408 - loff_t end_of_write = offset + bytes_written; 2213 + struct netfs_io_request *wreq = wdata->rreq; 2214 + loff_t new_server_eof; 2409 2215 2410 - if (end_of_write > cifsi->netfs.remote_i_size) 2411 - netfs_resize_file(&cifsi->netfs, end_of_write, true); 2412 - } 2216 + if (result > 0) { 2217 + new_server_eof = wdata->subreq.start + wdata->subreq.transferred + result; 2413 2218 2414 - static ssize_t 2415 - cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2416 - size_t write_size, loff_t *offset) 2417 - { 2418 - int rc = 0; 2419 - unsigned int bytes_written = 0; 2420 - unsigned int total_written; 2421 - struct cifs_tcon *tcon; 2422 - struct TCP_Server_Info *server; 2423 - unsigned int xid; 2424 - struct dentry *dentry = open_file->dentry; 2425 - struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2426 - struct cifs_io_parms io_parms = {0}; 2427 - 2428 - cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2429 - write_size, *offset, dentry); 2430 - 2431 - tcon = tlink_tcon(open_file->tlink); 2432 - server = tcon->ses->server; 2433 - 2434 - if (!server->ops->sync_write) 2435 - return -ENOSYS; 2436 - 2437 - xid = get_xid(); 2438 - 2439 - for (total_written = 0; write_size > total_written; 2440 - total_written += bytes_written) { 2441 - rc = -EAGAIN; 2442 - while (rc == -EAGAIN) { 2443 - struct kvec iov[2]; 2444 - unsigned int len; 2445 - 2446 - if (open_file->invalidHandle) { 2447 - /* we could deadlock if we called 2448 - filemap_fdatawait from here so tell 2449 - reopen_file not to flush data to 2450 - server now */ 2451 - rc = cifs_reopen_file(open_file, false); 2452 - if (rc != 0) 2453 - break; 2454 - } 2455 - 2456 - len = min(server->ops->wp_retry_size(d_inode(dentry)), 2457 - (unsigned int)write_size - total_written); 2458 - /* iov[0] is reserved for smb header */ 2459 - iov[1].iov_base = (char *)write_data + total_written; 2460 - iov[1].iov_len = len; 2461 - io_parms.pid = pid; 2462 - io_parms.tcon = tcon; 2463 - io_parms.offset = *offset; 2464 - io_parms.length = len; 2465 - rc = server->ops->sync_write(xid, &open_file->fid, 2466 - &io_parms, &bytes_written, iov, 1); 2467 - } 2468 - if (rc || (bytes_written == 0)) { 2469 - if (total_written) 2470 - break; 2471 - else { 2472 - free_xid(xid); 2473 - return rc; 2474 - } 2475 - } else { 2476 - spin_lock(&d_inode(dentry)->i_lock); 2477 - cifs_update_eof(cifsi, *offset, bytes_written); 2478 - spin_unlock(&d_inode(dentry)->i_lock); 2479 - *offset += bytes_written; 2480 - } 2219 + if (new_server_eof > netfs_inode(wreq->inode)->remote_i_size) 2220 + netfs_resize_file(netfs_inode(wreq->inode), new_server_eof, true); 2481 2221 } 2482 2222 2483 - cifs_stats_bytes_written(tcon, total_written); 2484 - 2485 - if (total_written > 0) { 2486 - spin_lock(&d_inode(dentry)->i_lock); 2487 - if (*offset > d_inode(dentry)->i_size) { 2488 - i_size_write(d_inode(dentry), *offset); 2489 - d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2490 - } 2491 - spin_unlock(&d_inode(dentry)->i_lock); 2492 - } 2493 - mark_inode_dirty_sync(d_inode(dentry)); 2494 - free_xid(xid); 2495 - return total_written; 2223 + netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); 2496 2224 } 2497 2225 2498 2226 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, ··· 2617 2509 return -ENOENT; 2618 2510 } 2619 2511 2620 - void 2621 - cifs_writedata_release(struct kref *refcount) 2622 - { 2623 - struct cifs_writedata *wdata = container_of(refcount, 2624 - struct cifs_writedata, refcount); 2625 - #ifdef CONFIG_CIFS_SMB_DIRECT 2626 - if (wdata->mr) { 2627 - smbd_deregister_mr(wdata->mr); 2628 - wdata->mr = NULL; 2629 - } 2630 - #endif 2631 - 2632 - if (wdata->cfile) 2633 - cifsFileInfo_put(wdata->cfile); 2634 - 2635 - kfree(wdata); 2636 - } 2637 - 2638 2512 /* 2639 - * Write failed with a retryable error. Resend the write request. It's also 2640 - * possible that the page was redirtied so re-clean the page. 2513 + * Flush data on a strict file. 2641 2514 */ 2642 - static void 2643 - cifs_writev_requeue(struct cifs_writedata *wdata) 2644 - { 2645 - int rc = 0; 2646 - struct inode *inode = d_inode(wdata->cfile->dentry); 2647 - struct TCP_Server_Info *server; 2648 - unsigned int rest_len = wdata->bytes; 2649 - loff_t fpos = wdata->offset; 2650 - 2651 - server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2652 - do { 2653 - struct cifs_writedata *wdata2; 2654 - unsigned int wsize, cur_len; 2655 - 2656 - wsize = server->ops->wp_retry_size(inode); 2657 - if (wsize < rest_len) { 2658 - if (wsize < PAGE_SIZE) { 2659 - rc = -EOPNOTSUPP; 2660 - break; 2661 - } 2662 - cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2663 - } else { 2664 - cur_len = rest_len; 2665 - } 2666 - 2667 - wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2668 - if (!wdata2) { 2669 - rc = -ENOMEM; 2670 - break; 2671 - } 2672 - 2673 - wdata2->sync_mode = wdata->sync_mode; 2674 - wdata2->offset = fpos; 2675 - wdata2->bytes = cur_len; 2676 - wdata2->iter = wdata->iter; 2677 - 2678 - iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2679 - iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2680 - 2681 - if (iov_iter_is_xarray(&wdata2->iter)) 2682 - /* Check for pages having been redirtied and clean 2683 - * them. We can do this by walking the xarray. If 2684 - * it's not an xarray, then it's a DIO and we shouldn't 2685 - * be mucking around with the page bits. 2686 - */ 2687 - cifs_undirty_folios(inode, fpos, cur_len); 2688 - 2689 - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2690 - &wdata2->cfile); 2691 - if (!wdata2->cfile) { 2692 - cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2693 - rc); 2694 - if (!is_retryable_error(rc)) 2695 - rc = -EBADF; 2696 - } else { 2697 - wdata2->pid = wdata2->cfile->pid; 2698 - rc = server->ops->async_writev(wdata2, 2699 - cifs_writedata_release); 2700 - } 2701 - 2702 - kref_put(&wdata2->refcount, cifs_writedata_release); 2703 - if (rc) { 2704 - if (is_retryable_error(rc)) 2705 - continue; 2706 - fpos += cur_len; 2707 - rest_len -= cur_len; 2708 - break; 2709 - } 2710 - 2711 - fpos += cur_len; 2712 - rest_len -= cur_len; 2713 - } while (rest_len > 0); 2714 - 2715 - /* Clean up remaining pages from the original wdata */ 2716 - if (iov_iter_is_xarray(&wdata->iter)) 2717 - cifs_pages_write_failed(inode, fpos, rest_len); 2718 - 2719 - if (rc != 0 && !is_retryable_error(rc)) 2720 - mapping_set_error(inode->i_mapping, rc); 2721 - kref_put(&wdata->refcount, cifs_writedata_release); 2722 - } 2723 - 2724 - void 2725 - cifs_writev_complete(struct work_struct *work) 2726 - { 2727 - struct cifs_writedata *wdata = container_of(work, 2728 - struct cifs_writedata, work); 2729 - struct inode *inode = d_inode(wdata->cfile->dentry); 2730 - 2731 - if (wdata->result == 0) { 2732 - spin_lock(&inode->i_lock); 2733 - cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2734 - spin_unlock(&inode->i_lock); 2735 - cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2736 - wdata->bytes); 2737 - } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2738 - return cifs_writev_requeue(wdata); 2739 - 2740 - if (wdata->result == -EAGAIN) 2741 - cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2742 - else if (wdata->result < 0) 2743 - cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2744 - else 2745 - cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2746 - 2747 - if (wdata->result != -EAGAIN) 2748 - mapping_set_error(inode->i_mapping, wdata->result); 2749 - kref_put(&wdata->refcount, cifs_writedata_release); 2750 - } 2751 - 2752 - struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2753 - { 2754 - struct cifs_writedata *wdata; 2755 - 2756 - wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2757 - if (wdata != NULL) { 2758 - kref_init(&wdata->refcount); 2759 - INIT_LIST_HEAD(&wdata->list); 2760 - init_completion(&wdata->done); 2761 - INIT_WORK(&wdata->work, complete); 2762 - } 2763 - return wdata; 2764 - } 2765 - 2766 - static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2767 - { 2768 - struct address_space *mapping = page->mapping; 2769 - loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2770 - char *write_data; 2771 - int rc = -EFAULT; 2772 - int bytes_written = 0; 2773 - struct inode *inode; 2774 - struct cifsFileInfo *open_file; 2775 - 2776 - if (!mapping || !mapping->host) 2777 - return -EFAULT; 2778 - 2779 - inode = page->mapping->host; 2780 - 2781 - offset += (loff_t)from; 2782 - write_data = kmap(page); 2783 - write_data += from; 2784 - 2785 - if ((to > PAGE_SIZE) || (from > to)) { 2786 - kunmap(page); 2787 - return -EIO; 2788 - } 2789 - 2790 - /* racing with truncate? */ 2791 - if (offset > mapping->host->i_size) { 2792 - kunmap(page); 2793 - return 0; /* don't care */ 2794 - } 2795 - 2796 - /* check to make sure that we are not extending the file */ 2797 - if (mapping->host->i_size - offset < (loff_t)to) 2798 - to = (unsigned)(mapping->host->i_size - offset); 2799 - 2800 - rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2801 - &open_file); 2802 - if (!rc) { 2803 - bytes_written = cifs_write(open_file, open_file->pid, 2804 - write_data, to - from, &offset); 2805 - cifsFileInfo_put(open_file); 2806 - /* Does mm or vfs already set times? */ 2807 - simple_inode_init_ts(inode); 2808 - if ((bytes_written > 0) && (offset)) 2809 - rc = 0; 2810 - else if (bytes_written < 0) 2811 - rc = bytes_written; 2812 - else 2813 - rc = -EFAULT; 2814 - } else { 2815 - cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2816 - if (!is_retryable_error(rc)) 2817 - rc = -EIO; 2818 - } 2819 - 2820 - kunmap(page); 2821 - return rc; 2822 - } 2823 - 2824 - /* 2825 - * Extend the region to be written back to include subsequent contiguously 2826 - * dirty pages if possible, but don't sleep while doing so. 2827 - */ 2828 - static void cifs_extend_writeback(struct address_space *mapping, 2829 - struct xa_state *xas, 2830 - long *_count, 2831 - loff_t start, 2832 - int max_pages, 2833 - loff_t max_len, 2834 - size_t *_len) 2835 - { 2836 - struct folio_batch batch; 2837 - struct folio *folio; 2838 - unsigned int nr_pages; 2839 - pgoff_t index = (start + *_len) / PAGE_SIZE; 2840 - size_t len; 2841 - bool stop = true; 2842 - unsigned int i; 2843 - 2844 - folio_batch_init(&batch); 2845 - 2846 - do { 2847 - /* Firstly, we gather up a batch of contiguous dirty pages 2848 - * under the RCU read lock - but we can't clear the dirty flags 2849 - * there if any of those pages are mapped. 2850 - */ 2851 - rcu_read_lock(); 2852 - 2853 - xas_for_each(xas, folio, ULONG_MAX) { 2854 - stop = true; 2855 - if (xas_retry(xas, folio)) 2856 - continue; 2857 - if (xa_is_value(folio)) 2858 - break; 2859 - if (folio->index != index) { 2860 - xas_reset(xas); 2861 - break; 2862 - } 2863 - 2864 - if (!folio_try_get_rcu(folio)) { 2865 - xas_reset(xas); 2866 - continue; 2867 - } 2868 - nr_pages = folio_nr_pages(folio); 2869 - if (nr_pages > max_pages) { 2870 - xas_reset(xas); 2871 - break; 2872 - } 2873 - 2874 - /* Has the page moved or been split? */ 2875 - if (unlikely(folio != xas_reload(xas))) { 2876 - folio_put(folio); 2877 - xas_reset(xas); 2878 - break; 2879 - } 2880 - 2881 - if (!folio_trylock(folio)) { 2882 - folio_put(folio); 2883 - xas_reset(xas); 2884 - break; 2885 - } 2886 - if (!folio_test_dirty(folio) || 2887 - folio_test_writeback(folio)) { 2888 - folio_unlock(folio); 2889 - folio_put(folio); 2890 - xas_reset(xas); 2891 - break; 2892 - } 2893 - 2894 - max_pages -= nr_pages; 2895 - len = folio_size(folio); 2896 - stop = false; 2897 - 2898 - index += nr_pages; 2899 - *_count -= nr_pages; 2900 - *_len += len; 2901 - if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2902 - stop = true; 2903 - 2904 - if (!folio_batch_add(&batch, folio)) 2905 - break; 2906 - if (stop) 2907 - break; 2908 - } 2909 - 2910 - xas_pause(xas); 2911 - rcu_read_unlock(); 2912 - 2913 - /* Now, if we obtained any pages, we can shift them to being 2914 - * writable and mark them for caching. 2915 - */ 2916 - if (!folio_batch_count(&batch)) 2917 - break; 2918 - 2919 - for (i = 0; i < folio_batch_count(&batch); i++) { 2920 - folio = batch.folios[i]; 2921 - /* The folio should be locked, dirty and not undergoing 2922 - * writeback from the loop above. 2923 - */ 2924 - if (!folio_clear_dirty_for_io(folio)) 2925 - WARN_ON(1); 2926 - folio_start_writeback(folio); 2927 - folio_unlock(folio); 2928 - } 2929 - 2930 - folio_batch_release(&batch); 2931 - cond_resched(); 2932 - } while (!stop); 2933 - } 2934 - 2935 - /* 2936 - * Write back the locked page and any subsequent non-locked dirty pages. 2937 - */ 2938 - static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2939 - struct writeback_control *wbc, 2940 - struct xa_state *xas, 2941 - struct folio *folio, 2942 - unsigned long long start, 2943 - unsigned long long end) 2944 - { 2945 - struct inode *inode = mapping->host; 2946 - struct TCP_Server_Info *server; 2947 - struct cifs_writedata *wdata; 2948 - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2949 - struct cifs_credits credits_on_stack; 2950 - struct cifs_credits *credits = &credits_on_stack; 2951 - struct cifsFileInfo *cfile = NULL; 2952 - unsigned long long i_size = i_size_read(inode), max_len; 2953 - unsigned int xid, wsize; 2954 - size_t len = folio_size(folio); 2955 - long count = wbc->nr_to_write; 2956 - int rc; 2957 - 2958 - /* The folio should be locked, dirty and not undergoing writeback. */ 2959 - if (!folio_clear_dirty_for_io(folio)) 2960 - WARN_ON_ONCE(1); 2961 - folio_start_writeback(folio); 2962 - 2963 - count -= folio_nr_pages(folio); 2964 - 2965 - xid = get_xid(); 2966 - server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2967 - 2968 - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2969 - if (rc) { 2970 - cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2971 - goto err_xid; 2972 - } 2973 - 2974 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2975 - &wsize, credits); 2976 - if (rc != 0) 2977 - goto err_close; 2978 - 2979 - wdata = cifs_writedata_alloc(cifs_writev_complete); 2980 - if (!wdata) { 2981 - rc = -ENOMEM; 2982 - goto err_uncredit; 2983 - } 2984 - 2985 - wdata->sync_mode = wbc->sync_mode; 2986 - wdata->offset = folio_pos(folio); 2987 - wdata->pid = cfile->pid; 2988 - wdata->credits = credits_on_stack; 2989 - wdata->cfile = cfile; 2990 - wdata->server = server; 2991 - cfile = NULL; 2992 - 2993 - /* Find all consecutive lockable dirty pages that have contiguous 2994 - * written regions, stopping when we find a page that is not 2995 - * immediately lockable, is not dirty or is missing, or we reach the 2996 - * end of the range. 2997 - */ 2998 - if (start < i_size) { 2999 - /* Trim the write to the EOF; the extra data is ignored. Also 3000 - * put an upper limit on the size of a single storedata op. 3001 - */ 3002 - max_len = wsize; 3003 - max_len = min_t(unsigned long long, max_len, end - start + 1); 3004 - max_len = min_t(unsigned long long, max_len, i_size - start); 3005 - 3006 - if (len < max_len) { 3007 - int max_pages = INT_MAX; 3008 - 3009 - #ifdef CONFIG_CIFS_SMB_DIRECT 3010 - if (server->smbd_conn) 3011 - max_pages = server->smbd_conn->max_frmr_depth; 3012 - #endif 3013 - max_pages -= folio_nr_pages(folio); 3014 - 3015 - if (max_pages > 0) 3016 - cifs_extend_writeback(mapping, xas, &count, start, 3017 - max_pages, max_len, &len); 3018 - } 3019 - } 3020 - len = min_t(unsigned long long, len, i_size - start); 3021 - 3022 - /* We now have a contiguous set of dirty pages, each with writeback 3023 - * set; the first page is still locked at this point, but all the rest 3024 - * have been unlocked. 3025 - */ 3026 - folio_unlock(folio); 3027 - wdata->bytes = len; 3028 - 3029 - if (start < i_size) { 3030 - iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 3031 - start, len); 3032 - 3033 - rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 3034 - if (rc) 3035 - goto err_wdata; 3036 - 3037 - if (wdata->cfile->invalidHandle) 3038 - rc = -EAGAIN; 3039 - else 3040 - rc = wdata->server->ops->async_writev(wdata, 3041 - cifs_writedata_release); 3042 - if (rc >= 0) { 3043 - kref_put(&wdata->refcount, cifs_writedata_release); 3044 - goto err_close; 3045 - } 3046 - } else { 3047 - /* The dirty region was entirely beyond the EOF. */ 3048 - cifs_pages_written_back(inode, start, len); 3049 - rc = 0; 3050 - } 3051 - 3052 - err_wdata: 3053 - kref_put(&wdata->refcount, cifs_writedata_release); 3054 - err_uncredit: 3055 - add_credits_and_wake_if(server, credits, 0); 3056 - err_close: 3057 - if (cfile) 3058 - cifsFileInfo_put(cfile); 3059 - err_xid: 3060 - free_xid(xid); 3061 - if (rc == 0) { 3062 - wbc->nr_to_write = count; 3063 - rc = len; 3064 - } else if (is_retryable_error(rc)) { 3065 - cifs_pages_write_redirty(inode, start, len); 3066 - } else { 3067 - cifs_pages_write_failed(inode, start, len); 3068 - mapping_set_error(mapping, rc); 3069 - } 3070 - /* Indication to update ctime and mtime as close is deferred */ 3071 - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3072 - return rc; 3073 - } 3074 - 3075 - /* 3076 - * write a region of pages back to the server 3077 - */ 3078 - static ssize_t cifs_writepages_begin(struct address_space *mapping, 3079 - struct writeback_control *wbc, 3080 - struct xa_state *xas, 3081 - unsigned long long *_start, 3082 - unsigned long long end) 3083 - { 3084 - struct folio *folio; 3085 - unsigned long long start = *_start; 3086 - ssize_t ret; 3087 - int skips = 0; 3088 - 3089 - search_again: 3090 - /* Find the first dirty page. */ 3091 - rcu_read_lock(); 3092 - 3093 - for (;;) { 3094 - folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 3095 - if (xas_retry(xas, folio) || xa_is_value(folio)) 3096 - continue; 3097 - if (!folio) 3098 - break; 3099 - 3100 - if (!folio_try_get_rcu(folio)) { 3101 - xas_reset(xas); 3102 - continue; 3103 - } 3104 - 3105 - if (unlikely(folio != xas_reload(xas))) { 3106 - folio_put(folio); 3107 - xas_reset(xas); 3108 - continue; 3109 - } 3110 - 3111 - xas_pause(xas); 3112 - break; 3113 - } 3114 - rcu_read_unlock(); 3115 - if (!folio) 3116 - return 0; 3117 - 3118 - start = folio_pos(folio); /* May regress with THPs */ 3119 - 3120 - /* At this point we hold neither the i_pages lock nor the page lock: 3121 - * the page may be truncated or invalidated (changing page->mapping to 3122 - * NULL), or even swizzled back from swapper_space to tmpfs file 3123 - * mapping 3124 - */ 3125 - lock_again: 3126 - if (wbc->sync_mode != WB_SYNC_NONE) { 3127 - ret = folio_lock_killable(folio); 3128 - if (ret < 0) 3129 - return ret; 3130 - } else { 3131 - if (!folio_trylock(folio)) 3132 - goto search_again; 3133 - } 3134 - 3135 - if (folio->mapping != mapping || 3136 - !folio_test_dirty(folio)) { 3137 - start += folio_size(folio); 3138 - folio_unlock(folio); 3139 - goto search_again; 3140 - } 3141 - 3142 - if (folio_test_writeback(folio) || 3143 - folio_test_fscache(folio)) { 3144 - folio_unlock(folio); 3145 - if (wbc->sync_mode != WB_SYNC_NONE) { 3146 - folio_wait_writeback(folio); 3147 - #ifdef CONFIG_CIFS_FSCACHE 3148 - folio_wait_fscache(folio); 3149 - #endif 3150 - goto lock_again; 3151 - } 3152 - 3153 - start += folio_size(folio); 3154 - if (wbc->sync_mode == WB_SYNC_NONE) { 3155 - if (skips >= 5 || need_resched()) { 3156 - ret = 0; 3157 - goto out; 3158 - } 3159 - skips++; 3160 - } 3161 - goto search_again; 3162 - } 3163 - 3164 - ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3165 - out: 3166 - if (ret > 0) 3167 - *_start = start + ret; 3168 - return ret; 3169 - } 3170 - 3171 - /* 3172 - * Write a region of pages back to the server 3173 - */ 3174 - static int cifs_writepages_region(struct address_space *mapping, 3175 - struct writeback_control *wbc, 3176 - unsigned long long *_start, 3177 - unsigned long long end) 3178 - { 3179 - ssize_t ret; 3180 - 3181 - XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3182 - 3183 - do { 3184 - ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3185 - if (ret > 0 && wbc->nr_to_write > 0) 3186 - cond_resched(); 3187 - } while (ret > 0 && wbc->nr_to_write > 0); 3188 - 3189 - return ret > 0 ? 0 : ret; 3190 - } 3191 - 3192 - /* 3193 - * Write some of the pending data back to the server 3194 - */ 3195 - static int cifs_writepages(struct address_space *mapping, 3196 - struct writeback_control *wbc) 3197 - { 3198 - loff_t start, end; 3199 - int ret; 3200 - 3201 - /* We have to be careful as we can end up racing with setattr() 3202 - * truncating the pagecache since the caller doesn't take a lock here 3203 - * to prevent it. 3204 - */ 3205 - 3206 - if (wbc->range_cyclic && mapping->writeback_index) { 3207 - start = mapping->writeback_index * PAGE_SIZE; 3208 - ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3209 - if (ret < 0) 3210 - goto out; 3211 - 3212 - if (wbc->nr_to_write <= 0) { 3213 - mapping->writeback_index = start / PAGE_SIZE; 3214 - goto out; 3215 - } 3216 - 3217 - start = 0; 3218 - end = mapping->writeback_index * PAGE_SIZE; 3219 - mapping->writeback_index = 0; 3220 - ret = cifs_writepages_region(mapping, wbc, &start, end); 3221 - if (ret == 0) 3222 - mapping->writeback_index = start / PAGE_SIZE; 3223 - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3224 - start = 0; 3225 - ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3226 - if (wbc->nr_to_write > 0 && ret == 0) 3227 - mapping->writeback_index = start / PAGE_SIZE; 3228 - } else { 3229 - start = wbc->range_start; 3230 - ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3231 - } 3232 - 3233 - out: 3234 - return ret; 3235 - } 3236 - 3237 - static int 3238 - cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3239 - { 3240 - int rc; 3241 - unsigned int xid; 3242 - 3243 - xid = get_xid(); 3244 - /* BB add check for wbc flags */ 3245 - get_page(page); 3246 - if (!PageUptodate(page)) 3247 - cifs_dbg(FYI, "ppw - page not up to date\n"); 3248 - 3249 - /* 3250 - * Set the "writeback" flag, and clear "dirty" in the radix tree. 3251 - * 3252 - * A writepage() implementation always needs to do either this, 3253 - * or re-dirty the page with "redirty_page_for_writepage()" in 3254 - * the case of a failure. 3255 - * 3256 - * Just unlocking the page will cause the radix tree tag-bits 3257 - * to fail to update with the state of the page correctly. 3258 - */ 3259 - set_page_writeback(page); 3260 - retry_write: 3261 - rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3262 - if (is_retryable_error(rc)) { 3263 - if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3264 - goto retry_write; 3265 - redirty_page_for_writepage(wbc, page); 3266 - } else if (rc != 0) { 3267 - SetPageError(page); 3268 - mapping_set_error(page->mapping, rc); 3269 - } else { 3270 - SetPageUptodate(page); 3271 - } 3272 - end_page_writeback(page); 3273 - put_page(page); 3274 - free_xid(xid); 3275 - return rc; 3276 - } 3277 - 3278 - static int cifs_write_end(struct file *file, struct address_space *mapping, 3279 - loff_t pos, unsigned len, unsigned copied, 3280 - struct page *page, void *fsdata) 3281 - { 3282 - int rc; 3283 - struct inode *inode = mapping->host; 3284 - struct cifsFileInfo *cfile = file->private_data; 3285 - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3286 - struct folio *folio = page_folio(page); 3287 - __u32 pid; 3288 - 3289 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3290 - pid = cfile->pid; 3291 - else 3292 - pid = current->tgid; 3293 - 3294 - cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3295 - page, pos, copied); 3296 - 3297 - if (folio_test_checked(folio)) { 3298 - if (copied == len) 3299 - folio_mark_uptodate(folio); 3300 - folio_clear_checked(folio); 3301 - } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3302 - folio_mark_uptodate(folio); 3303 - 3304 - if (!folio_test_uptodate(folio)) { 3305 - char *page_data; 3306 - unsigned offset = pos & (PAGE_SIZE - 1); 3307 - unsigned int xid; 3308 - 3309 - xid = get_xid(); 3310 - /* this is probably better than directly calling 3311 - partialpage_write since in this function the file handle is 3312 - known which we might as well leverage */ 3313 - /* BB check if anything else missing out of ppw 3314 - such as updating last write time */ 3315 - page_data = kmap(page); 3316 - rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3317 - /* if (rc < 0) should we set writebehind rc? */ 3318 - kunmap(page); 3319 - 3320 - free_xid(xid); 3321 - } else { 3322 - rc = copied; 3323 - pos += copied; 3324 - set_page_dirty(page); 3325 - } 3326 - 3327 - if (rc > 0) { 3328 - spin_lock(&inode->i_lock); 3329 - if (pos > inode->i_size) { 3330 - loff_t additional_blocks = (512 - 1 + copied) >> 9; 3331 - 3332 - i_size_write(inode, pos); 3333 - /* 3334 - * Estimate new allocation size based on the amount written. 3335 - * This will be updated from server on close (and on queryinfo) 3336 - */ 3337 - inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3338 - inode->i_blocks + additional_blocks); 3339 - } 3340 - spin_unlock(&inode->i_lock); 3341 - } 3342 - 3343 - unlock_page(page); 3344 - put_page(page); 3345 - /* Indication to update ctime and mtime as close is deferred */ 3346 - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3347 - 3348 - return rc; 3349 - } 3350 - 3351 2515 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3352 2516 int datasync) 3353 2517 { ··· 2674 3294 return rc; 2675 3295 } 2676 3296 3297 + /* 3298 + * Flush data on a non-strict data. 3299 + */ 2677 3300 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 2678 3301 { 2679 3302 unsigned int xid; ··· 2743 3360 return rc; 2744 3361 } 2745 3362 2746 - static void 2747 - cifs_uncached_writedata_release(struct kref *refcount) 2748 - { 2749 - struct cifs_writedata *wdata = container_of(refcount, 2750 - struct cifs_writedata, refcount); 2751 - 2752 - kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 2753 - cifs_writedata_release(refcount); 2754 - } 2755 - 2756 - static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 2757 - 2758 - static void 2759 - cifs_uncached_writev_complete(struct work_struct *work) 2760 - { 2761 - struct cifs_writedata *wdata = container_of(work, 2762 - struct cifs_writedata, work); 2763 - struct inode *inode = d_inode(wdata->cfile->dentry); 2764 - struct cifsInodeInfo *cifsi = CIFS_I(inode); 2765 - 2766 - spin_lock(&inode->i_lock); 2767 - cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 2768 - if (cifsi->netfs.remote_i_size > inode->i_size) 2769 - i_size_write(inode, cifsi->netfs.remote_i_size); 2770 - spin_unlock(&inode->i_lock); 2771 - 2772 - complete(&wdata->done); 2773 - collect_uncached_write_data(wdata->ctx); 2774 - /* the below call can possibly free the last ref to aio ctx */ 2775 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2776 - } 2777 - 2778 - static int 2779 - cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 2780 - struct cifs_aio_ctx *ctx) 2781 - { 2782 - unsigned int wsize; 2783 - struct cifs_credits credits; 2784 - int rc; 2785 - struct TCP_Server_Info *server = wdata->server; 2786 - 2787 - do { 2788 - if (wdata->cfile->invalidHandle) { 2789 - rc = cifs_reopen_file(wdata->cfile, false); 2790 - if (rc == -EAGAIN) 2791 - continue; 2792 - else if (rc) 2793 - break; 2794 - } 2795 - 2796 - 2797 - /* 2798 - * Wait for credits to resend this wdata. 2799 - * Note: we are attempting to resend the whole wdata not in 2800 - * segments 2801 - */ 2802 - do { 2803 - rc = server->ops->wait_mtu_credits(server, wdata->bytes, 2804 - &wsize, &credits); 2805 - if (rc) 2806 - goto fail; 2807 - 2808 - if (wsize < wdata->bytes) { 2809 - add_credits_and_wake_if(server, &credits, 0); 2810 - msleep(1000); 2811 - } 2812 - } while (wsize < wdata->bytes); 2813 - wdata->credits = credits; 2814 - 2815 - rc = adjust_credits(server, &wdata->credits, wdata->bytes); 2816 - 2817 - if (!rc) { 2818 - if (wdata->cfile->invalidHandle) 2819 - rc = -EAGAIN; 2820 - else { 2821 - wdata->replay = true; 2822 - #ifdef CONFIG_CIFS_SMB_DIRECT 2823 - if (wdata->mr) { 2824 - wdata->mr->need_invalidate = true; 2825 - smbd_deregister_mr(wdata->mr); 2826 - wdata->mr = NULL; 2827 - } 2828 - #endif 2829 - rc = server->ops->async_writev(wdata, 2830 - cifs_uncached_writedata_release); 2831 - } 2832 - } 2833 - 2834 - /* If the write was successfully sent, we are done */ 2835 - if (!rc) { 2836 - list_add_tail(&wdata->list, wdata_list); 2837 - return 0; 2838 - } 2839 - 2840 - /* Roll back credits and retry if needed */ 2841 - add_credits_and_wake_if(server, &wdata->credits, 0); 2842 - } while (rc == -EAGAIN); 2843 - 2844 - fail: 2845 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2846 - return rc; 2847 - } 2848 - 2849 - /* 2850 - * Select span of a bvec iterator we're going to use. Limit it by both maximum 2851 - * size and maximum number of segments. 2852 - */ 2853 - static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 2854 - size_t max_segs, unsigned int *_nsegs) 2855 - { 2856 - const struct bio_vec *bvecs = iter->bvec; 2857 - unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 2858 - size_t len, span = 0, n = iter->count; 2859 - size_t skip = iter->iov_offset; 2860 - 2861 - if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 2862 - return 0; 2863 - 2864 - while (n && ix < nbv && skip) { 2865 - len = bvecs[ix].bv_len; 2866 - if (skip < len) 2867 - break; 2868 - skip -= len; 2869 - n -= len; 2870 - ix++; 2871 - } 2872 - 2873 - while (n && ix < nbv) { 2874 - len = min3(n, bvecs[ix].bv_len - skip, max_size); 2875 - span += len; 2876 - max_size -= len; 2877 - nsegs++; 2878 - ix++; 2879 - if (max_size == 0 || nsegs >= max_segs) 2880 - break; 2881 - skip = 0; 2882 - n -= len; 2883 - } 2884 - 2885 - *_nsegs = nsegs; 2886 - return span; 2887 - } 2888 - 2889 - static int 2890 - cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 2891 - struct cifsFileInfo *open_file, 2892 - struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 2893 - struct cifs_aio_ctx *ctx) 2894 - { 2895 - int rc = 0; 2896 - size_t cur_len, max_len; 2897 - struct cifs_writedata *wdata; 2898 - pid_t pid; 2899 - struct TCP_Server_Info *server; 2900 - unsigned int xid, max_segs = INT_MAX; 2901 - 2902 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2903 - pid = open_file->pid; 2904 - else 2905 - pid = current->tgid; 2906 - 2907 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 2908 - xid = get_xid(); 2909 - 2910 - #ifdef CONFIG_CIFS_SMB_DIRECT 2911 - if (server->smbd_conn) 2912 - max_segs = server->smbd_conn->max_frmr_depth; 2913 - #endif 2914 - 2915 - do { 2916 - struct cifs_credits credits_on_stack; 2917 - struct cifs_credits *credits = &credits_on_stack; 2918 - unsigned int wsize, nsegs = 0; 2919 - 2920 - if (signal_pending(current)) { 2921 - rc = -EINTR; 2922 - break; 2923 - } 2924 - 2925 - if (open_file->invalidHandle) { 2926 - rc = cifs_reopen_file(open_file, false); 2927 - if (rc == -EAGAIN) 2928 - continue; 2929 - else if (rc) 2930 - break; 2931 - } 2932 - 2933 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2934 - &wsize, credits); 2935 - if (rc) 2936 - break; 2937 - 2938 - max_len = min_t(const size_t, len, wsize); 2939 - if (!max_len) { 2940 - rc = -EAGAIN; 2941 - add_credits_and_wake_if(server, credits, 0); 2942 - break; 2943 - } 2944 - 2945 - cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 2946 - cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 2947 - cur_len, max_len, nsegs, from->nr_segs, max_segs); 2948 - if (cur_len == 0) { 2949 - rc = -EIO; 2950 - add_credits_and_wake_if(server, credits, 0); 2951 - break; 2952 - } 2953 - 2954 - wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 2955 - if (!wdata) { 2956 - rc = -ENOMEM; 2957 - add_credits_and_wake_if(server, credits, 0); 2958 - break; 2959 - } 2960 - 2961 - wdata->sync_mode = WB_SYNC_ALL; 2962 - wdata->offset = (__u64)fpos; 2963 - wdata->cfile = cifsFileInfo_get(open_file); 2964 - wdata->server = server; 2965 - wdata->pid = pid; 2966 - wdata->bytes = cur_len; 2967 - wdata->credits = credits_on_stack; 2968 - wdata->iter = *from; 2969 - wdata->ctx = ctx; 2970 - kref_get(&ctx->refcount); 2971 - 2972 - iov_iter_truncate(&wdata->iter, cur_len); 2973 - 2974 - rc = adjust_credits(server, &wdata->credits, wdata->bytes); 2975 - 2976 - if (!rc) { 2977 - if (wdata->cfile->invalidHandle) 2978 - rc = -EAGAIN; 2979 - else 2980 - rc = server->ops->async_writev(wdata, 2981 - cifs_uncached_writedata_release); 2982 - } 2983 - 2984 - if (rc) { 2985 - add_credits_and_wake_if(server, &wdata->credits, 0); 2986 - kref_put(&wdata->refcount, 2987 - cifs_uncached_writedata_release); 2988 - if (rc == -EAGAIN) 2989 - continue; 2990 - break; 2991 - } 2992 - 2993 - list_add_tail(&wdata->list, wdata_list); 2994 - iov_iter_advance(from, cur_len); 2995 - fpos += cur_len; 2996 - len -= cur_len; 2997 - } while (len > 0); 2998 - 2999 - free_xid(xid); 3000 - return rc; 3001 - } 3002 - 3003 - static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3004 - { 3005 - struct cifs_writedata *wdata, *tmp; 3006 - struct cifs_tcon *tcon; 3007 - struct cifs_sb_info *cifs_sb; 3008 - struct dentry *dentry = ctx->cfile->dentry; 3009 - ssize_t rc; 3010 - 3011 - tcon = tlink_tcon(ctx->cfile->tlink); 3012 - cifs_sb = CIFS_SB(dentry->d_sb); 3013 - 3014 - mutex_lock(&ctx->aio_mutex); 3015 - 3016 - if (list_empty(&ctx->list)) { 3017 - mutex_unlock(&ctx->aio_mutex); 3018 - return; 3019 - } 3020 - 3021 - rc = ctx->rc; 3022 - /* 3023 - * Wait for and collect replies for any successful sends in order of 3024 - * increasing offset. Once an error is hit, then return without waiting 3025 - * for any more replies. 3026 - */ 3027 - restart_loop: 3028 - list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3029 - if (!rc) { 3030 - if (!try_wait_for_completion(&wdata->done)) { 3031 - mutex_unlock(&ctx->aio_mutex); 3032 - return; 3033 - } 3034 - 3035 - if (wdata->result) 3036 - rc = wdata->result; 3037 - else 3038 - ctx->total_len += wdata->bytes; 3039 - 3040 - /* resend call if it's a retryable error */ 3041 - if (rc == -EAGAIN) { 3042 - struct list_head tmp_list; 3043 - struct iov_iter tmp_from = ctx->iter; 3044 - 3045 - INIT_LIST_HEAD(&tmp_list); 3046 - list_del_init(&wdata->list); 3047 - 3048 - if (ctx->direct_io) 3049 - rc = cifs_resend_wdata( 3050 - wdata, &tmp_list, ctx); 3051 - else { 3052 - iov_iter_advance(&tmp_from, 3053 - wdata->offset - ctx->pos); 3054 - 3055 - rc = cifs_write_from_iter(wdata->offset, 3056 - wdata->bytes, &tmp_from, 3057 - ctx->cfile, cifs_sb, &tmp_list, 3058 - ctx); 3059 - 3060 - kref_put(&wdata->refcount, 3061 - cifs_uncached_writedata_release); 3062 - } 3063 - 3064 - list_splice(&tmp_list, &ctx->list); 3065 - goto restart_loop; 3066 - } 3067 - } 3068 - list_del_init(&wdata->list); 3069 - kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3070 - } 3071 - 3072 - cifs_stats_bytes_written(tcon, ctx->total_len); 3073 - set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3074 - 3075 - ctx->rc = (rc == 0) ? ctx->total_len : rc; 3076 - 3077 - mutex_unlock(&ctx->aio_mutex); 3078 - 3079 - if (ctx->iocb && ctx->iocb->ki_complete) 3080 - ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3081 - else 3082 - complete(&ctx->done); 3083 - } 3084 - 3085 - static ssize_t __cifs_writev( 3086 - struct kiocb *iocb, struct iov_iter *from, bool direct) 3087 - { 3088 - struct file *file = iocb->ki_filp; 3089 - ssize_t total_written = 0; 3090 - struct cifsFileInfo *cfile; 3091 - struct cifs_tcon *tcon; 3092 - struct cifs_sb_info *cifs_sb; 3093 - struct cifs_aio_ctx *ctx; 3094 - int rc; 3095 - 3096 - rc = generic_write_checks(iocb, from); 3097 - if (rc <= 0) 3098 - return rc; 3099 - 3100 - cifs_sb = CIFS_FILE_SB(file); 3101 - cfile = file->private_data; 3102 - tcon = tlink_tcon(cfile->tlink); 3103 - 3104 - if (!tcon->ses->server->ops->async_writev) 3105 - return -ENOSYS; 3106 - 3107 - ctx = cifs_aio_ctx_alloc(); 3108 - if (!ctx) 3109 - return -ENOMEM; 3110 - 3111 - ctx->cfile = cifsFileInfo_get(cfile); 3112 - 3113 - if (!is_sync_kiocb(iocb)) 3114 - ctx->iocb = iocb; 3115 - 3116 - ctx->pos = iocb->ki_pos; 3117 - ctx->direct_io = direct; 3118 - ctx->nr_pinned_pages = 0; 3119 - 3120 - if (user_backed_iter(from)) { 3121 - /* 3122 - * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3123 - * they contain references to the calling process's virtual 3124 - * memory layout which won't be available in an async worker 3125 - * thread. This also takes a pin on every folio involved. 3126 - */ 3127 - rc = netfs_extract_user_iter(from, iov_iter_count(from), 3128 - &ctx->iter, 0); 3129 - if (rc < 0) { 3130 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3131 - return rc; 3132 - } 3133 - 3134 - ctx->nr_pinned_pages = rc; 3135 - ctx->bv = (void *)ctx->iter.bvec; 3136 - ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3137 - } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3138 - !is_sync_kiocb(iocb)) { 3139 - /* 3140 - * If the op is asynchronous, we need to copy the list attached 3141 - * to a BVEC/KVEC-type iterator, but we assume that the storage 3142 - * will be pinned by the caller; in any case, we may or may not 3143 - * be able to pin the pages, so we don't try. 3144 - */ 3145 - ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3146 - if (!ctx->bv) { 3147 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3148 - return -ENOMEM; 3149 - } 3150 - } else { 3151 - /* 3152 - * Otherwise, we just pass the iterator down as-is and rely on 3153 - * the caller to make sure the pages referred to by the 3154 - * iterator don't evaporate. 3155 - */ 3156 - ctx->iter = *from; 3157 - } 3158 - 3159 - ctx->len = iov_iter_count(&ctx->iter); 3160 - 3161 - /* grab a lock here due to read response handlers can access ctx */ 3162 - mutex_lock(&ctx->aio_mutex); 3163 - 3164 - rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3165 - cfile, cifs_sb, &ctx->list, ctx); 3166 - 3167 - /* 3168 - * If at least one write was successfully sent, then discard any rc 3169 - * value from the later writes. If the other write succeeds, then 3170 - * we'll end up returning whatever was written. If it fails, then 3171 - * we'll get a new rc value from that. 3172 - */ 3173 - if (!list_empty(&ctx->list)) 3174 - rc = 0; 3175 - 3176 - mutex_unlock(&ctx->aio_mutex); 3177 - 3178 - if (rc) { 3179 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3180 - return rc; 3181 - } 3182 - 3183 - if (!is_sync_kiocb(iocb)) { 3184 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3185 - return -EIOCBQUEUED; 3186 - } 3187 - 3188 - rc = wait_for_completion_killable(&ctx->done); 3189 - if (rc) { 3190 - mutex_lock(&ctx->aio_mutex); 3191 - ctx->rc = rc = -EINTR; 3192 - total_written = ctx->total_len; 3193 - mutex_unlock(&ctx->aio_mutex); 3194 - } else { 3195 - rc = ctx->rc; 3196 - total_written = ctx->total_len; 3197 - } 3198 - 3199 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3200 - 3201 - if (unlikely(!total_written)) 3202 - return rc; 3203 - 3204 - iocb->ki_pos += total_written; 3205 - return total_written; 3206 - } 3207 - 3208 - ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3209 - { 3210 - struct file *file = iocb->ki_filp; 3211 - 3212 - cifs_revalidate_mapping(file->f_inode); 3213 - return __cifs_writev(iocb, from, true); 3214 - } 3215 - 3216 - ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3217 - { 3218 - return __cifs_writev(iocb, from, false); 3219 - } 3220 - 3221 3363 static ssize_t 3222 3364 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3223 3365 { ··· 2753 3845 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2754 3846 ssize_t rc; 2755 3847 2756 - inode_lock(inode); 3848 + rc = netfs_start_io_write(inode); 3849 + if (rc < 0) 3850 + return rc; 3851 + 2757 3852 /* 2758 3853 * We need to hold the sem to be sure nobody modifies lock list 2759 3854 * with a brlock that prevents writing. ··· 2770 3859 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 2771 3860 server->vals->exclusive_lock_type, 0, 2772 3861 NULL, CIFS_WRITE_OP)) 2773 - rc = __generic_file_write_iter(iocb, from); 3862 + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); 2774 3863 else 2775 3864 rc = -EACCES; 2776 3865 out: 2777 3866 up_read(&cinode->lock_sem); 2778 - inode_unlock(inode); 2779 - 3867 + netfs_end_io_write(inode); 2780 3868 if (rc > 0) 2781 3869 rc = generic_write_sync(iocb, rc); 2782 3870 return rc; ··· 2798 3888 2799 3889 if (CIFS_CACHE_WRITE(cinode)) { 2800 3890 if (cap_unix(tcon->ses) && 2801 - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2802 - && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 2803 - written = generic_file_write_iter(iocb, from); 3891 + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 3892 + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3893 + written = netfs_file_write_iter(iocb, from); 2804 3894 goto out; 2805 3895 } 2806 3896 written = cifs_writev(iocb, from); ··· 2812 3902 * affected pages because it may cause a error with mandatory locks on 2813 3903 * these pages but not on the region from pos to ppos+len-1. 2814 3904 */ 2815 - written = cifs_user_writev(iocb, from); 3905 + written = netfs_file_write_iter(iocb, from); 2816 3906 if (CIFS_CACHE_READ(cinode)) { 2817 3907 /* 2818 3908 * We have read level caching and we have just sent a write ··· 2831 3921 return written; 2832 3922 } 2833 3923 2834 - static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3924 + ssize_t cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) 2835 3925 { 2836 - struct cifs_readdata *rdata; 3926 + ssize_t rc; 3927 + struct inode *inode = file_inode(iocb->ki_filp); 2837 3928 2838 - rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 2839 - if (rdata) { 2840 - kref_init(&rdata->refcount); 2841 - INIT_LIST_HEAD(&rdata->list); 2842 - init_completion(&rdata->done); 2843 - INIT_WORK(&rdata->work, complete); 2844 - } 3929 + if (iocb->ki_flags & IOCB_DIRECT) 3930 + return netfs_unbuffered_read_iter(iocb, iter); 2845 3931 2846 - return rdata; 2847 - } 2848 - 2849 - void 2850 - cifs_readdata_release(struct kref *refcount) 2851 - { 2852 - struct cifs_readdata *rdata = container_of(refcount, 2853 - struct cifs_readdata, refcount); 2854 - 2855 - if (rdata->ctx) 2856 - kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 2857 - #ifdef CONFIG_CIFS_SMB_DIRECT 2858 - if (rdata->mr) { 2859 - smbd_deregister_mr(rdata->mr); 2860 - rdata->mr = NULL; 2861 - } 2862 - #endif 2863 - if (rdata->cfile) 2864 - cifsFileInfo_put(rdata->cfile); 2865 - 2866 - kfree(rdata); 2867 - } 2868 - 2869 - static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 2870 - 2871 - static void 2872 - cifs_uncached_readv_complete(struct work_struct *work) 2873 - { 2874 - struct cifs_readdata *rdata = container_of(work, 2875 - struct cifs_readdata, work); 2876 - 2877 - complete(&rdata->done); 2878 - collect_uncached_read_data(rdata->ctx); 2879 - /* the below call can possibly free the last ref to aio ctx */ 2880 - kref_put(&rdata->refcount, cifs_readdata_release); 2881 - } 2882 - 2883 - static int cifs_resend_rdata(struct cifs_readdata *rdata, 2884 - struct list_head *rdata_list, 2885 - struct cifs_aio_ctx *ctx) 2886 - { 2887 - unsigned int rsize; 2888 - struct cifs_credits credits; 2889 - int rc; 2890 - struct TCP_Server_Info *server; 2891 - 2892 - /* XXX: should we pick a new channel here? */ 2893 - server = rdata->server; 2894 - 2895 - do { 2896 - if (rdata->cfile->invalidHandle) { 2897 - rc = cifs_reopen_file(rdata->cfile, true); 2898 - if (rc == -EAGAIN) 2899 - continue; 2900 - else if (rc) 2901 - break; 2902 - } 2903 - 2904 - /* 2905 - * Wait for credits to resend this rdata. 2906 - * Note: we are attempting to resend the whole rdata not in 2907 - * segments 2908 - */ 2909 - do { 2910 - rc = server->ops->wait_mtu_credits(server, rdata->bytes, 2911 - &rsize, &credits); 2912 - 2913 - if (rc) 2914 - goto fail; 2915 - 2916 - if (rsize < rdata->bytes) { 2917 - add_credits_and_wake_if(server, &credits, 0); 2918 - msleep(1000); 2919 - } 2920 - } while (rsize < rdata->bytes); 2921 - rdata->credits = credits; 2922 - 2923 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 2924 - if (!rc) { 2925 - if (rdata->cfile->invalidHandle) 2926 - rc = -EAGAIN; 2927 - else { 2928 - #ifdef CONFIG_CIFS_SMB_DIRECT 2929 - if (rdata->mr) { 2930 - rdata->mr->need_invalidate = true; 2931 - smbd_deregister_mr(rdata->mr); 2932 - rdata->mr = NULL; 2933 - } 2934 - #endif 2935 - rc = server->ops->async_readv(rdata); 2936 - } 2937 - } 2938 - 2939 - /* If the read was successfully sent, we are done */ 2940 - if (!rc) { 2941 - /* Add to aio pending list */ 2942 - list_add_tail(&rdata->list, rdata_list); 2943 - return 0; 2944 - } 2945 - 2946 - /* Roll back credits and retry if needed */ 2947 - add_credits_and_wake_if(server, &rdata->credits, 0); 2948 - } while (rc == -EAGAIN); 2949 - 2950 - fail: 2951 - kref_put(&rdata->refcount, cifs_readdata_release); 2952 - return rc; 2953 - } 2954 - 2955 - static int 2956 - cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 2957 - struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 2958 - struct cifs_aio_ctx *ctx) 2959 - { 2960 - struct cifs_readdata *rdata; 2961 - unsigned int rsize, nsegs, max_segs = INT_MAX; 2962 - struct cifs_credits credits_on_stack; 2963 - struct cifs_credits *credits = &credits_on_stack; 2964 - size_t cur_len, max_len; 2965 - int rc; 2966 - pid_t pid; 2967 - struct TCP_Server_Info *server; 2968 - 2969 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 2970 - 2971 - #ifdef CONFIG_CIFS_SMB_DIRECT 2972 - if (server->smbd_conn) 2973 - max_segs = server->smbd_conn->max_frmr_depth; 2974 - #endif 2975 - 2976 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2977 - pid = open_file->pid; 2978 - else 2979 - pid = current->tgid; 2980 - 2981 - do { 2982 - if (open_file->invalidHandle) { 2983 - rc = cifs_reopen_file(open_file, true); 2984 - if (rc == -EAGAIN) 2985 - continue; 2986 - else if (rc) 2987 - break; 2988 - } 2989 - 2990 - if (cifs_sb->ctx->rsize == 0) 2991 - cifs_sb->ctx->rsize = 2992 - server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 2993 - cifs_sb->ctx); 2994 - 2995 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 2996 - &rsize, credits); 2997 - if (rc) 2998 - break; 2999 - 3000 - max_len = min_t(size_t, len, rsize); 3001 - 3002 - cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 3003 - max_segs, &nsegs); 3004 - cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3005 - cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 3006 - if (cur_len == 0) { 3007 - rc = -EIO; 3008 - add_credits_and_wake_if(server, credits, 0); 3009 - break; 3010 - } 3011 - 3012 - rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 3013 - if (!rdata) { 3014 - add_credits_and_wake_if(server, credits, 0); 3015 - rc = -ENOMEM; 3016 - break; 3017 - } 3018 - 3019 - rdata->server = server; 3020 - rdata->cfile = cifsFileInfo_get(open_file); 3021 - rdata->offset = fpos; 3022 - rdata->bytes = cur_len; 3023 - rdata->pid = pid; 3024 - rdata->credits = credits_on_stack; 3025 - rdata->ctx = ctx; 3026 - kref_get(&ctx->refcount); 3027 - 3028 - rdata->iter = ctx->iter; 3029 - iov_iter_truncate(&rdata->iter, cur_len); 3030 - 3031 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3032 - 3033 - if (!rc) { 3034 - if (rdata->cfile->invalidHandle) 3035 - rc = -EAGAIN; 3036 - else 3037 - rc = server->ops->async_readv(rdata); 3038 - } 3039 - 3040 - if (rc) { 3041 - add_credits_and_wake_if(server, &rdata->credits, 0); 3042 - kref_put(&rdata->refcount, cifs_readdata_release); 3043 - if (rc == -EAGAIN) 3044 - continue; 3045 - break; 3046 - } 3047 - 3048 - list_add_tail(&rdata->list, rdata_list); 3049 - iov_iter_advance(&ctx->iter, cur_len); 3050 - fpos += cur_len; 3051 - len -= cur_len; 3052 - } while (len > 0); 3053 - 3054 - return rc; 3055 - } 3056 - 3057 - static void 3058 - collect_uncached_read_data(struct cifs_aio_ctx *ctx) 3059 - { 3060 - struct cifs_readdata *rdata, *tmp; 3061 - struct cifs_sb_info *cifs_sb; 3062 - int rc; 3063 - 3064 - cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 3065 - 3066 - mutex_lock(&ctx->aio_mutex); 3067 - 3068 - if (list_empty(&ctx->list)) { 3069 - mutex_unlock(&ctx->aio_mutex); 3070 - return; 3071 - } 3072 - 3073 - rc = ctx->rc; 3074 - /* the loop below should proceed in the order of increasing offsets */ 3075 - again: 3076 - list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 3077 - if (!rc) { 3078 - if (!try_wait_for_completion(&rdata->done)) { 3079 - mutex_unlock(&ctx->aio_mutex); 3080 - return; 3081 - } 3082 - 3083 - if (rdata->result == -EAGAIN) { 3084 - /* resend call if it's a retryable error */ 3085 - struct list_head tmp_list; 3086 - unsigned int got_bytes = rdata->got_bytes; 3087 - 3088 - list_del_init(&rdata->list); 3089 - INIT_LIST_HEAD(&tmp_list); 3090 - 3091 - if (ctx->direct_io) { 3092 - /* 3093 - * Re-use rdata as this is a 3094 - * direct I/O 3095 - */ 3096 - rc = cifs_resend_rdata( 3097 - rdata, 3098 - &tmp_list, ctx); 3099 - } else { 3100 - rc = cifs_send_async_read( 3101 - rdata->offset + got_bytes, 3102 - rdata->bytes - got_bytes, 3103 - rdata->cfile, cifs_sb, 3104 - &tmp_list, ctx); 3105 - 3106 - kref_put(&rdata->refcount, 3107 - cifs_readdata_release); 3108 - } 3109 - 3110 - list_splice(&tmp_list, &ctx->list); 3111 - 3112 - goto again; 3113 - } else if (rdata->result) 3114 - rc = rdata->result; 3115 - 3116 - /* if there was a short read -- discard anything left */ 3117 - if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 3118 - rc = -ENODATA; 3119 - 3120 - ctx->total_len += rdata->got_bytes; 3121 - } 3122 - list_del_init(&rdata->list); 3123 - kref_put(&rdata->refcount, cifs_readdata_release); 3124 - } 3125 - 3126 - /* mask nodata case */ 3127 - if (rc == -ENODATA) 3128 - rc = 0; 3129 - 3130 - ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 3131 - 3132 - mutex_unlock(&ctx->aio_mutex); 3133 - 3134 - if (ctx->iocb && ctx->iocb->ki_complete) 3135 - ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3136 - else 3137 - complete(&ctx->done); 3138 - } 3139 - 3140 - static ssize_t __cifs_readv( 3141 - struct kiocb *iocb, struct iov_iter *to, bool direct) 3142 - { 3143 - size_t len; 3144 - struct file *file = iocb->ki_filp; 3145 - struct cifs_sb_info *cifs_sb; 3146 - struct cifsFileInfo *cfile; 3147 - struct cifs_tcon *tcon; 3148 - ssize_t rc, total_read = 0; 3149 - loff_t offset = iocb->ki_pos; 3150 - struct cifs_aio_ctx *ctx; 3151 - 3152 - len = iov_iter_count(to); 3153 - if (!len) 3154 - return 0; 3155 - 3156 - cifs_sb = CIFS_FILE_SB(file); 3157 - cfile = file->private_data; 3158 - tcon = tlink_tcon(cfile->tlink); 3159 - 3160 - if (!tcon->ses->server->ops->async_readv) 3161 - return -ENOSYS; 3162 - 3163 - if ((file->f_flags & O_ACCMODE) == O_WRONLY) 3164 - cifs_dbg(FYI, "attempting read on write only file instance\n"); 3165 - 3166 - ctx = cifs_aio_ctx_alloc(); 3167 - if (!ctx) 3168 - return -ENOMEM; 3169 - 3170 - ctx->pos = offset; 3171 - ctx->direct_io = direct; 3172 - ctx->len = len; 3173 - ctx->cfile = cifsFileInfo_get(cfile); 3174 - ctx->nr_pinned_pages = 0; 3175 - 3176 - if (!is_sync_kiocb(iocb)) 3177 - ctx->iocb = iocb; 3178 - 3179 - if (user_backed_iter(to)) { 3180 - /* 3181 - * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3182 - * they contain references to the calling process's virtual 3183 - * memory layout which won't be available in an async worker 3184 - * thread. This also takes a pin on every folio involved. 3185 - */ 3186 - rc = netfs_extract_user_iter(to, iov_iter_count(to), 3187 - &ctx->iter, 0); 3188 - if (rc < 0) { 3189 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3190 - return rc; 3191 - } 3192 - 3193 - ctx->nr_pinned_pages = rc; 3194 - ctx->bv = (void *)ctx->iter.bvec; 3195 - ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 3196 - ctx->should_dirty = true; 3197 - } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 3198 - !is_sync_kiocb(iocb)) { 3199 - /* 3200 - * If the op is asynchronous, we need to copy the list attached 3201 - * to a BVEC/KVEC-type iterator, but we assume that the storage 3202 - * will be retained by the caller; in any case, we may or may 3203 - * not be able to pin the pages, so we don't try. 3204 - */ 3205 - ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 3206 - if (!ctx->bv) { 3207 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3208 - return -ENOMEM; 3209 - } 3210 - } else { 3211 - /* 3212 - * Otherwise, we just pass the iterator down as-is and rely on 3213 - * the caller to make sure the pages referred to by the 3214 - * iterator don't evaporate. 3215 - */ 3216 - ctx->iter = *to; 3217 - } 3218 - 3219 - if (direct) { 3220 - rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 3221 - offset, offset + len - 1); 3222 - if (rc) { 3223 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3224 - return -EAGAIN; 3225 - } 3226 - } 3227 - 3228 - /* grab a lock here due to read response handlers can access ctx */ 3229 - mutex_lock(&ctx->aio_mutex); 3230 - 3231 - rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 3232 - 3233 - /* if at least one read request send succeeded, then reset rc */ 3234 - if (!list_empty(&ctx->list)) 3235 - rc = 0; 3236 - 3237 - mutex_unlock(&ctx->aio_mutex); 3238 - 3239 - if (rc) { 3240 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3932 + rc = cifs_revalidate_mapping(inode); 3933 + if (rc) 3241 3934 return rc; 3242 - } 3243 3935 3244 - if (!is_sync_kiocb(iocb)) { 3245 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3246 - return -EIOCBQUEUED; 3247 - } 3248 - 3249 - rc = wait_for_completion_killable(&ctx->done); 3250 - if (rc) { 3251 - mutex_lock(&ctx->aio_mutex); 3252 - ctx->rc = rc = -EINTR; 3253 - total_read = ctx->total_len; 3254 - mutex_unlock(&ctx->aio_mutex); 3255 - } else { 3256 - rc = ctx->rc; 3257 - total_read = ctx->total_len; 3258 - } 3259 - 3260 - kref_put(&ctx->refcount, cifs_aio_ctx_release); 3261 - 3262 - if (total_read) { 3263 - iocb->ki_pos += total_read; 3264 - return total_read; 3265 - } 3266 - return rc; 3936 + return netfs_file_read_iter(iocb, iter); 3267 3937 } 3268 3938 3269 - ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 3939 + ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 3270 3940 { 3271 - return __cifs_readv(iocb, to, true); 3272 - } 3941 + struct inode *inode = file_inode(iocb->ki_filp); 3942 + struct cifsInodeInfo *cinode = CIFS_I(inode); 3943 + ssize_t written; 3944 + int rc; 3273 3945 3274 - ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 3275 - { 3276 - return __cifs_readv(iocb, to, false); 3946 + if (iocb->ki_filp->f_flags & O_DIRECT) { 3947 + written = netfs_unbuffered_write_iter(iocb, from); 3948 + if (written > 0 && CIFS_CACHE_READ(cinode)) { 3949 + cifs_zap_mapping(inode); 3950 + cifs_dbg(FYI, 3951 + "Set no oplock for inode=%p after a write operation\n", 3952 + inode); 3953 + cinode->oplock = 0; 3954 + } 3955 + return written; 3956 + } 3957 + 3958 + written = cifs_get_writer(cinode); 3959 + if (written) 3960 + return written; 3961 + 3962 + written = netfs_file_write_iter(iocb, from); 3963 + 3964 + if (!CIFS_CACHE_WRITE(CIFS_I(inode))) { 3965 + rc = filemap_fdatawrite(inode->i_mapping); 3966 + if (rc) 3967 + cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", 3968 + rc, inode); 3969 + } 3970 + 3971 + cifs_put_writer(cinode); 3972 + return written; 3277 3973 } 3278 3974 3279 3975 ssize_t ··· 2902 4386 * pos+len-1. 2903 4387 */ 2904 4388 if (!CIFS_CACHE_READ(cinode)) 2905 - return cifs_user_readv(iocb, to); 4389 + return netfs_unbuffered_read_iter(iocb, to); 2906 4390 2907 4391 if (cap_unix(tcon->ses) && 2908 4392 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2909 - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2910 - return generic_file_read_iter(iocb, to); 4393 + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 4394 + if (iocb->ki_flags & IOCB_DIRECT) 4395 + return netfs_unbuffered_read_iter(iocb, to); 4396 + return netfs_buffered_read_iter(iocb, to); 4397 + } 2911 4398 2912 4399 /* 2913 4400 * We need to hold the sem to be sure nobody modifies lock list ··· 2919 4400 down_read(&cinode->lock_sem); 2920 4401 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 2921 4402 tcon->ses->server->vals->shared_lock_type, 2922 - 0, NULL, CIFS_READ_OP)) 2923 - rc = generic_file_read_iter(iocb, to); 4403 + 0, NULL, CIFS_READ_OP)) { 4404 + if (iocb->ki_flags & IOCB_DIRECT) 4405 + rc = netfs_unbuffered_read_iter(iocb, to); 4406 + else 4407 + rc = netfs_buffered_read_iter(iocb, to); 4408 + } 2924 4409 up_read(&cinode->lock_sem); 2925 4410 return rc; 2926 4411 } 2927 4412 2928 - static ssize_t 2929 - cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 2930 - { 2931 - int rc = -EACCES; 2932 - unsigned int bytes_read = 0; 2933 - unsigned int total_read; 2934 - unsigned int current_read_size; 2935 - unsigned int rsize; 2936 - struct cifs_sb_info *cifs_sb; 2937 - struct cifs_tcon *tcon; 2938 - struct TCP_Server_Info *server; 2939 - unsigned int xid; 2940 - char *cur_offset; 2941 - struct cifsFileInfo *open_file; 2942 - struct cifs_io_parms io_parms = {0}; 2943 - int buf_type = CIFS_NO_BUFFER; 2944 - __u32 pid; 2945 - 2946 - xid = get_xid(); 2947 - cifs_sb = CIFS_FILE_SB(file); 2948 - 2949 - /* FIXME: set up handlers for larger reads and/or convert to async */ 2950 - rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 2951 - 2952 - if (file->private_data == NULL) { 2953 - rc = -EBADF; 2954 - free_xid(xid); 2955 - return rc; 2956 - } 2957 - open_file = file->private_data; 2958 - tcon = tlink_tcon(open_file->tlink); 2959 - server = cifs_pick_channel(tcon->ses); 2960 - 2961 - if (!server->ops->sync_read) { 2962 - free_xid(xid); 2963 - return -ENOSYS; 2964 - } 2965 - 2966 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2967 - pid = open_file->pid; 2968 - else 2969 - pid = current->tgid; 2970 - 2971 - if ((file->f_flags & O_ACCMODE) == O_WRONLY) 2972 - cifs_dbg(FYI, "attempting read on write only file instance\n"); 2973 - 2974 - for (total_read = 0, cur_offset = read_data; read_size > total_read; 2975 - total_read += bytes_read, cur_offset += bytes_read) { 2976 - do { 2977 - current_read_size = min_t(uint, read_size - total_read, 2978 - rsize); 2979 - /* 2980 - * For windows me and 9x we do not want to request more 2981 - * than it negotiated since it will refuse the read 2982 - * then. 2983 - */ 2984 - if (!(tcon->ses->capabilities & 2985 - tcon->ses->server->vals->cap_large_files)) { 2986 - current_read_size = min_t(uint, 2987 - current_read_size, CIFSMaxBufSize); 2988 - } 2989 - if (open_file->invalidHandle) { 2990 - rc = cifs_reopen_file(open_file, true); 2991 - if (rc != 0) 2992 - break; 2993 - } 2994 - io_parms.pid = pid; 2995 - io_parms.tcon = tcon; 2996 - io_parms.offset = *offset; 2997 - io_parms.length = current_read_size; 2998 - io_parms.server = server; 2999 - rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 3000 - &bytes_read, &cur_offset, 3001 - &buf_type); 3002 - } while (rc == -EAGAIN); 3003 - 3004 - if (rc || (bytes_read == 0)) { 3005 - if (total_read) { 3006 - break; 3007 - } else { 3008 - free_xid(xid); 3009 - return rc; 3010 - } 3011 - } else { 3012 - cifs_stats_bytes_read(tcon, total_read); 3013 - *offset += bytes_read; 3014 - } 3015 - } 3016 - free_xid(xid); 3017 - return total_read; 3018 - } 3019 - 3020 - /* 3021 - * If the page is mmap'ed into a process' page tables, then we need to make 3022 - * sure that it doesn't change while being written back. 3023 - */ 3024 4413 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 3025 4414 { 3026 - struct folio *folio = page_folio(vmf->page); 3027 - 3028 - /* Wait for the folio to be written to the cache before we allow it to 3029 - * be modified. We then assume the entire folio will need writing back. 3030 - */ 3031 - #ifdef CONFIG_CIFS_FSCACHE 3032 - if (folio_test_fscache(folio) && 3033 - folio_wait_fscache_killable(folio) < 0) 3034 - return VM_FAULT_RETRY; 3035 - #endif 3036 - 3037 - folio_wait_writeback(folio); 3038 - 3039 - if (folio_lock_killable(folio) < 0) 3040 - return VM_FAULT_RETRY; 3041 - return VM_FAULT_LOCKED; 4415 + return netfs_page_mkwrite(vmf, NULL); 3042 4416 } 3043 4417 3044 4418 static const struct vm_operations_struct cifs_file_vm_ops = { ··· 2972 4560 rc = generic_file_mmap(file, vma); 2973 4561 if (!rc) 2974 4562 vma->vm_ops = &cifs_file_vm_ops; 2975 - 2976 - free_xid(xid); 2977 - return rc; 2978 - } 2979 - 2980 - /* 2981 - * Unlock a bunch of folios in the pagecache. 2982 - */ 2983 - static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 2984 - { 2985 - struct folio *folio; 2986 - XA_STATE(xas, &mapping->i_pages, first); 2987 - 2988 - rcu_read_lock(); 2989 - xas_for_each(&xas, folio, last) { 2990 - folio_unlock(folio); 2991 - } 2992 - rcu_read_unlock(); 2993 - } 2994 - 2995 - static void cifs_readahead_complete(struct work_struct *work) 2996 - { 2997 - struct cifs_readdata *rdata = container_of(work, 2998 - struct cifs_readdata, work); 2999 - struct folio *folio; 3000 - pgoff_t last; 3001 - bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 3002 - 3003 - XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 3004 - 3005 - if (good) 3006 - cifs_readahead_to_fscache(rdata->mapping->host, 3007 - rdata->offset, rdata->bytes); 3008 - 3009 - if (iov_iter_count(&rdata->iter) > 0) 3010 - iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 3011 - 3012 - last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 3013 - 3014 - rcu_read_lock(); 3015 - xas_for_each(&xas, folio, last) { 3016 - if (good) { 3017 - flush_dcache_folio(folio); 3018 - folio_mark_uptodate(folio); 3019 - } 3020 - folio_unlock(folio); 3021 - } 3022 - rcu_read_unlock(); 3023 - 3024 - kref_put(&rdata->refcount, cifs_readdata_release); 3025 - } 3026 - 3027 - static void cifs_readahead(struct readahead_control *ractl) 3028 - { 3029 - struct cifsFileInfo *open_file = ractl->file->private_data; 3030 - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 3031 - struct TCP_Server_Info *server; 3032 - unsigned int xid, nr_pages, cache_nr_pages = 0; 3033 - unsigned int ra_pages; 3034 - pgoff_t next_cached = ULONG_MAX, ra_index; 3035 - bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 3036 - cifs_inode_cookie(ractl->mapping->host)->cache_priv; 3037 - bool check_cache = caching; 3038 - pid_t pid; 3039 - int rc = 0; 3040 - 3041 - /* Note that readahead_count() lags behind our dequeuing of pages from 3042 - * the ractl, wo we have to keep track for ourselves. 3043 - */ 3044 - ra_pages = readahead_count(ractl); 3045 - ra_index = readahead_index(ractl); 3046 - 3047 - xid = get_xid(); 3048 - 3049 - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3050 - pid = open_file->pid; 3051 - else 3052 - pid = current->tgid; 3053 - 3054 - server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3055 - 3056 - cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 3057 - __func__, ractl->file, ractl->mapping, ra_pages); 3058 - 3059 - /* 3060 - * Chop the readahead request up into rsize-sized read requests. 3061 - */ 3062 - while ((nr_pages = ra_pages)) { 3063 - unsigned int i, rsize; 3064 - struct cifs_readdata *rdata; 3065 - struct cifs_credits credits_on_stack; 3066 - struct cifs_credits *credits = &credits_on_stack; 3067 - struct folio *folio; 3068 - pgoff_t fsize; 3069 - 3070 - /* 3071 - * Find out if we have anything cached in the range of 3072 - * interest, and if so, where the next chunk of cached data is. 3073 - */ 3074 - if (caching) { 3075 - if (check_cache) { 3076 - rc = cifs_fscache_query_occupancy( 3077 - ractl->mapping->host, ra_index, nr_pages, 3078 - &next_cached, &cache_nr_pages); 3079 - if (rc < 0) 3080 - caching = false; 3081 - check_cache = false; 3082 - } 3083 - 3084 - if (ra_index == next_cached) { 3085 - /* 3086 - * TODO: Send a whole batch of pages to be read 3087 - * by the cache. 3088 - */ 3089 - folio = readahead_folio(ractl); 3090 - fsize = folio_nr_pages(folio); 3091 - ra_pages -= fsize; 3092 - ra_index += fsize; 3093 - if (cifs_readpage_from_fscache(ractl->mapping->host, 3094 - &folio->page) < 0) { 3095 - /* 3096 - * TODO: Deal with cache read failure 3097 - * here, but for the moment, delegate 3098 - * that to readpage. 3099 - */ 3100 - caching = false; 3101 - } 3102 - folio_unlock(folio); 3103 - next_cached += fsize; 3104 - cache_nr_pages -= fsize; 3105 - if (cache_nr_pages == 0) 3106 - check_cache = true; 3107 - continue; 3108 - } 3109 - } 3110 - 3111 - if (open_file->invalidHandle) { 3112 - rc = cifs_reopen_file(open_file, true); 3113 - if (rc) { 3114 - if (rc == -EAGAIN) 3115 - continue; 3116 - break; 3117 - } 3118 - } 3119 - 3120 - if (cifs_sb->ctx->rsize == 0) 3121 - cifs_sb->ctx->rsize = 3122 - server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 3123 - cifs_sb->ctx); 3124 - 3125 - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 3126 - &rsize, credits); 3127 - if (rc) 3128 - break; 3129 - nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 3130 - if (next_cached != ULONG_MAX) 3131 - nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 3132 - 3133 - /* 3134 - * Give up immediately if rsize is too small to read an entire 3135 - * page. The VFS will fall back to readpage. We should never 3136 - * reach this point however since we set ra_pages to 0 when the 3137 - * rsize is smaller than a cache page. 3138 - */ 3139 - if (unlikely(!nr_pages)) { 3140 - add_credits_and_wake_if(server, credits, 0); 3141 - break; 3142 - } 3143 - 3144 - rdata = cifs_readdata_alloc(cifs_readahead_complete); 3145 - if (!rdata) { 3146 - /* best to give up if we're out of mem */ 3147 - add_credits_and_wake_if(server, credits, 0); 3148 - break; 3149 - } 3150 - 3151 - rdata->offset = ra_index * PAGE_SIZE; 3152 - rdata->bytes = nr_pages * PAGE_SIZE; 3153 - rdata->cfile = cifsFileInfo_get(open_file); 3154 - rdata->server = server; 3155 - rdata->mapping = ractl->mapping; 3156 - rdata->pid = pid; 3157 - rdata->credits = credits_on_stack; 3158 - 3159 - for (i = 0; i < nr_pages; i++) { 3160 - if (!readahead_folio(ractl)) 3161 - WARN_ON(1); 3162 - } 3163 - ra_pages -= nr_pages; 3164 - ra_index += nr_pages; 3165 - 3166 - iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 3167 - rdata->offset, rdata->bytes); 3168 - 3169 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3170 - if (!rc) { 3171 - if (rdata->cfile->invalidHandle) 3172 - rc = -EAGAIN; 3173 - else 3174 - rc = server->ops->async_readv(rdata); 3175 - } 3176 - 3177 - if (rc) { 3178 - add_credits_and_wake_if(server, &rdata->credits, 0); 3179 - cifs_unlock_folios(rdata->mapping, 3180 - rdata->offset / PAGE_SIZE, 3181 - (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 3182 - /* Fallback to the readpage in error/reconnect cases */ 3183 - kref_put(&rdata->refcount, cifs_readdata_release); 3184 - break; 3185 - } 3186 - 3187 - kref_put(&rdata->refcount, cifs_readdata_release); 3188 - } 3189 - 3190 - free_xid(xid); 3191 - } 3192 - 3193 - /* 3194 - * cifs_readpage_worker must be called with the page pinned 3195 - */ 3196 - static int cifs_readpage_worker(struct file *file, struct page *page, 3197 - loff_t *poffset) 3198 - { 3199 - struct inode *inode = file_inode(file); 3200 - struct timespec64 atime, mtime; 3201 - char *read_data; 3202 - int rc; 3203 - 3204 - /* Is the page cached? */ 3205 - rc = cifs_readpage_from_fscache(inode, page); 3206 - if (rc == 0) 3207 - goto read_complete; 3208 - 3209 - read_data = kmap(page); 3210 - /* for reads over a certain size could initiate async read ahead */ 3211 - 3212 - rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 3213 - 3214 - if (rc < 0) 3215 - goto io_error; 3216 - else 3217 - cifs_dbg(FYI, "Bytes read %d\n", rc); 3218 - 3219 - /* we do not want atime to be less than mtime, it broke some apps */ 3220 - atime = inode_set_atime_to_ts(inode, current_time(inode)); 3221 - mtime = inode_get_mtime(inode); 3222 - if (timespec64_compare(&atime, &mtime) < 0) 3223 - inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 3224 - 3225 - if (PAGE_SIZE > rc) 3226 - memset(read_data + rc, 0, PAGE_SIZE - rc); 3227 - 3228 - flush_dcache_page(page); 3229 - SetPageUptodate(page); 3230 - rc = 0; 3231 - 3232 - io_error: 3233 - kunmap(page); 3234 - 3235 - read_complete: 3236 - unlock_page(page); 3237 - return rc; 3238 - } 3239 - 3240 - static int cifs_read_folio(struct file *file, struct folio *folio) 3241 - { 3242 - struct page *page = &folio->page; 3243 - loff_t offset = page_file_offset(page); 3244 - int rc = -EACCES; 3245 - unsigned int xid; 3246 - 3247 - xid = get_xid(); 3248 - 3249 - if (file->private_data == NULL) { 3250 - rc = -EBADF; 3251 - free_xid(xid); 3252 - return rc; 3253 - } 3254 - 3255 - cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 3256 - page, (int)offset, (int)offset); 3257 - 3258 - rc = cifs_readpage_worker(file, page, &offset); 3259 4563 3260 4564 free_xid(xid); 3261 4565 return rc; ··· 3022 4894 return false; 3023 4895 } else 3024 4896 return true; 3025 - } 3026 - 3027 - static int cifs_write_begin(struct file *file, struct address_space *mapping, 3028 - loff_t pos, unsigned len, 3029 - struct page **pagep, void **fsdata) 3030 - { 3031 - int oncethru = 0; 3032 - pgoff_t index = pos >> PAGE_SHIFT; 3033 - loff_t offset = pos & (PAGE_SIZE - 1); 3034 - loff_t page_start = pos & PAGE_MASK; 3035 - loff_t i_size; 3036 - struct page *page; 3037 - int rc = 0; 3038 - 3039 - cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 3040 - 3041 - start: 3042 - page = grab_cache_page_write_begin(mapping, index); 3043 - if (!page) { 3044 - rc = -ENOMEM; 3045 - goto out; 3046 - } 3047 - 3048 - if (PageUptodate(page)) 3049 - goto out; 3050 - 3051 - /* 3052 - * If we write a full page it will be up to date, no need to read from 3053 - * the server. If the write is short, we'll end up doing a sync write 3054 - * instead. 3055 - */ 3056 - if (len == PAGE_SIZE) 3057 - goto out; 3058 - 3059 - /* 3060 - * optimize away the read when we have an oplock, and we're not 3061 - * expecting to use any of the data we'd be reading in. That 3062 - * is, when the page lies beyond the EOF, or straddles the EOF 3063 - * and the write will cover all of the existing data. 3064 - */ 3065 - if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 3066 - i_size = i_size_read(mapping->host); 3067 - if (page_start >= i_size || 3068 - (offset == 0 && (pos + len) >= i_size)) { 3069 - zero_user_segments(page, 0, offset, 3070 - offset + len, 3071 - PAGE_SIZE); 3072 - /* 3073 - * PageChecked means that the parts of the page 3074 - * to which we're not writing are considered up 3075 - * to date. Once the data is copied to the 3076 - * page, it can be set uptodate. 3077 - */ 3078 - SetPageChecked(page); 3079 - goto out; 3080 - } 3081 - } 3082 - 3083 - if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 3084 - /* 3085 - * might as well read a page, it is fast enough. If we get 3086 - * an error, we don't need to return it. cifs_write_end will 3087 - * do a sync write instead since PG_uptodate isn't set. 3088 - */ 3089 - cifs_readpage_worker(file, page, &page_start); 3090 - put_page(page); 3091 - oncethru = 1; 3092 - goto start; 3093 - } else { 3094 - /* we could try using another file handle if there is one - 3095 - but how would we lock it to prevent close of that handle 3096 - racing with this read? In any case 3097 - this will be written out by write_end so is fine */ 3098 - } 3099 - out: 3100 - *pagep = page; 3101 - return rc; 3102 - } 3103 - 3104 - static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 3105 - { 3106 - if (folio_test_private(folio)) 3107 - return 0; 3108 - if (folio_test_fscache(folio)) { 3109 - if (current_is_kswapd() || !(gfp & __GFP_FS)) 3110 - return false; 3111 - folio_wait_fscache(folio); 3112 - } 3113 - fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 3114 - return true; 3115 - } 3116 - 3117 - static void cifs_invalidate_folio(struct folio *folio, size_t offset, 3118 - size_t length) 3119 - { 3120 - folio_wait_fscache(folio); 3121 - } 3122 - 3123 - static int cifs_launder_folio(struct folio *folio) 3124 - { 3125 - int rc = 0; 3126 - loff_t range_start = folio_pos(folio); 3127 - loff_t range_end = range_start + folio_size(folio); 3128 - struct writeback_control wbc = { 3129 - .sync_mode = WB_SYNC_ALL, 3130 - .nr_to_write = 0, 3131 - .range_start = range_start, 3132 - .range_end = range_end, 3133 - }; 3134 - 3135 - cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 3136 - 3137 - if (folio_clear_dirty_for_io(folio)) 3138 - rc = cifs_writepage_locked(&folio->page, &wbc); 3139 - 3140 - folio_wait_fscache(folio); 3141 - return rc; 3142 4897 } 3143 4898 3144 4899 void cifs_oplock_break(struct work_struct *work) ··· 3113 5102 cifs_done_oplock_break(cinode); 3114 5103 } 3115 5104 3116 - /* 3117 - * The presence of cifs_direct_io() in the address space ops vector 3118 - * allowes open() O_DIRECT flags which would have failed otherwise. 3119 - * 3120 - * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 3121 - * so this method should never be called. 3122 - * 3123 - * Direct IO is not yet supported in the cached mode. 3124 - */ 3125 - static ssize_t 3126 - cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 3127 - { 3128 - /* 3129 - * FIXME 3130 - * Eventually need to support direct IO for non forcedirectio mounts 3131 - */ 3132 - return -EINVAL; 3133 - } 3134 - 3135 5105 static int cifs_swap_activate(struct swap_info_struct *sis, 3136 5106 struct file *swap_file, sector_t *span) 3137 5107 { ··· 3174 5182 } 3175 5183 3176 5184 const struct address_space_operations cifs_addr_ops = { 3177 - .read_folio = cifs_read_folio, 3178 - .readahead = cifs_readahead, 3179 - .writepages = cifs_writepages, 3180 - .write_begin = cifs_write_begin, 3181 - .write_end = cifs_write_end, 3182 - .dirty_folio = netfs_dirty_folio, 3183 - .release_folio = cifs_release_folio, 3184 - .direct_IO = cifs_direct_io, 3185 - .invalidate_folio = cifs_invalidate_folio, 3186 - .launder_folio = cifs_launder_folio, 3187 - .migrate_folio = filemap_migrate_folio, 5185 + .read_folio = netfs_read_folio, 5186 + .readahead = netfs_readahead, 5187 + .writepages = netfs_writepages, 5188 + .dirty_folio = netfs_dirty_folio, 5189 + .release_folio = netfs_release_folio, 5190 + .direct_IO = noop_direct_IO, 5191 + .invalidate_folio = netfs_invalidate_folio, 5192 + .migrate_folio = filemap_migrate_folio, 3188 5193 /* 3189 5194 * TODO: investigate and if useful we could add an is_dirty_writeback 3190 5195 * helper if needed 3191 5196 */ 3192 - .swap_activate = cifs_swap_activate, 5197 + .swap_activate = cifs_swap_activate, 3193 5198 .swap_deactivate = cifs_swap_deactivate, 3194 5199 }; 3195 5200 ··· 3196 5207 * to leave cifs_readahead out of the address space operations. 3197 5208 */ 3198 5209 const struct address_space_operations cifs_addr_ops_smallbuf = { 3199 - .read_folio = cifs_read_folio, 3200 - .writepages = cifs_writepages, 3201 - .write_begin = cifs_write_begin, 3202 - .write_end = cifs_write_end, 3203 - .dirty_folio = netfs_dirty_folio, 3204 - .release_folio = cifs_release_folio, 3205 - .invalidate_folio = cifs_invalidate_folio, 3206 - .launder_folio = cifs_launder_folio, 3207 - .migrate_folio = filemap_migrate_folio, 5210 + .read_folio = netfs_read_folio, 5211 + .writepages = netfs_writepages, 5212 + .dirty_folio = netfs_dirty_folio, 5213 + .release_folio = netfs_release_folio, 5214 + .invalidate_folio = netfs_invalidate_folio, 5215 + .migrate_folio = filemap_migrate_folio, 3208 5216 };

-109

fs/smb/client/fscache.c

··· 170 170 cifsi->netfs.cache = NULL; 171 171 } 172 172 } 173 - 174 - /* 175 - * Fallback page reading interface. 176 - */ 177 - static int fscache_fallback_read_page(struct inode *inode, struct page *page) 178 - { 179 - struct netfs_cache_resources cres; 180 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 181 - struct iov_iter iter; 182 - struct bio_vec bvec; 183 - int ret; 184 - 185 - memset(&cres, 0, sizeof(cres)); 186 - bvec_set_page(&bvec, page, PAGE_SIZE, 0); 187 - iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE); 188 - 189 - ret = fscache_begin_read_operation(&cres, cookie); 190 - if (ret < 0) 191 - return ret; 192 - 193 - ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, 194 - NULL, NULL); 195 - fscache_end_operation(&cres); 196 - return ret; 197 - } 198 - 199 - /* 200 - * Fallback page writing interface. 201 - */ 202 - static int fscache_fallback_write_pages(struct inode *inode, loff_t start, size_t len, 203 - bool no_space_allocated_yet) 204 - { 205 - struct netfs_cache_resources cres; 206 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 207 - struct iov_iter iter; 208 - int ret; 209 - 210 - memset(&cres, 0, sizeof(cres)); 211 - iov_iter_xarray(&iter, ITER_SOURCE, &inode->i_mapping->i_pages, start, len); 212 - 213 - ret = fscache_begin_write_operation(&cres, cookie); 214 - if (ret < 0) 215 - return ret; 216 - 217 - ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode), 218 - no_space_allocated_yet); 219 - if (ret == 0) 220 - ret = fscache_write(&cres, start, &iter, NULL, NULL); 221 - fscache_end_operation(&cres); 222 - return ret; 223 - } 224 - 225 - /* 226 - * Retrieve a page from FS-Cache 227 - */ 228 - int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) 229 - { 230 - int ret; 231 - 232 - cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", 233 - __func__, cifs_inode_cookie(inode), page, inode); 234 - 235 - ret = fscache_fallback_read_page(inode, page); 236 - if (ret < 0) 237 - return ret; 238 - 239 - /* Read completed synchronously */ 240 - SetPageUptodate(page); 241 - return 0; 242 - } 243 - 244 - void __cifs_readahead_to_fscache(struct inode *inode, loff_t pos, size_t len) 245 - { 246 - cifs_dbg(FYI, "%s: (fsc: %p, p: %llx, l: %zx, i: %p)\n", 247 - __func__, cifs_inode_cookie(inode), pos, len, inode); 248 - 249 - fscache_fallback_write_pages(inode, pos, len, true); 250 - } 251 - 252 - /* 253 - * Query the cache occupancy. 254 - */ 255 - int __cifs_fscache_query_occupancy(struct inode *inode, 256 - pgoff_t first, unsigned int nr_pages, 257 - pgoff_t *_data_first, 258 - unsigned int *_data_nr_pages) 259 - { 260 - struct netfs_cache_resources cres; 261 - struct fscache_cookie *cookie = cifs_inode_cookie(inode); 262 - loff_t start, data_start; 263 - size_t len, data_len; 264 - int ret; 265 - 266 - ret = fscache_begin_read_operation(&cres, cookie); 267 - if (ret < 0) 268 - return ret; 269 - 270 - start = first * PAGE_SIZE; 271 - len = nr_pages * PAGE_SIZE; 272 - ret = cres.ops->query_occupancy(&cres, start, len, PAGE_SIZE, 273 - &data_start, &data_len); 274 - if (ret == 0) { 275 - *_data_first = data_start / PAGE_SIZE; 276 - *_data_nr_pages = len / PAGE_SIZE; 277 - } 278 - 279 - fscache_end_operation(&cres); 280 - return ret; 281 - }

-54

fs/smb/client/fscache.h

··· 74 74 i_size_read(inode), flags); 75 75 } 76 76 77 - extern int __cifs_fscache_query_occupancy(struct inode *inode, 78 - pgoff_t first, unsigned int nr_pages, 79 - pgoff_t *_data_first, 80 - unsigned int *_data_nr_pages); 81 - 82 - static inline int cifs_fscache_query_occupancy(struct inode *inode, 83 - pgoff_t first, unsigned int nr_pages, 84 - pgoff_t *_data_first, 85 - unsigned int *_data_nr_pages) 86 - { 87 - if (!cifs_inode_cookie(inode)) 88 - return -ENOBUFS; 89 - return __cifs_fscache_query_occupancy(inode, first, nr_pages, 90 - _data_first, _data_nr_pages); 91 - } 92 - 93 - extern int __cifs_readpage_from_fscache(struct inode *pinode, struct page *ppage); 94 - extern void __cifs_readahead_to_fscache(struct inode *pinode, loff_t pos, size_t len); 95 - 96 - 97 - static inline int cifs_readpage_from_fscache(struct inode *inode, 98 - struct page *page) 99 - { 100 - if (cifs_inode_cookie(inode)) 101 - return __cifs_readpage_from_fscache(inode, page); 102 - return -ENOBUFS; 103 - } 104 - 105 - static inline void cifs_readahead_to_fscache(struct inode *inode, 106 - loff_t pos, size_t len) 107 - { 108 - if (cifs_inode_cookie(inode)) 109 - __cifs_readahead_to_fscache(inode, pos, len); 110 - } 111 - 112 77 static inline bool cifs_fscache_enabled(struct inode *inode) 113 78 { 114 79 return fscache_cookie_enabled(cifs_inode_cookie(inode)); ··· 95 130 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } 96 131 static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} 97 132 static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } 98 - 99 - static inline int cifs_fscache_query_occupancy(struct inode *inode, 100 - pgoff_t first, unsigned int nr_pages, 101 - pgoff_t *_data_first, 102 - unsigned int *_data_nr_pages) 103 - { 104 - *_data_first = ULONG_MAX; 105 - *_data_nr_pages = 0; 106 - return -ENOBUFS; 107 - } 108 - 109 - static inline int 110 - cifs_readpage_from_fscache(struct inode *inode, struct page *page) 111 - { 112 - return -ENOBUFS; 113 - } 114 - 115 - static inline 116 - void cifs_readahead_to_fscache(struct inode *inode, loff_t pos, size_t len) {} 117 133 118 134 #endif /* CONFIG_CIFS_FSCACHE */ 119 135

+24 -21

fs/smb/client/inode.c

··· 28 28 #include "cached_dir.h" 29 29 #include "reparse.h" 30 30 31 + /* 32 + * Set parameters for the netfs library 33 + */ 34 + static void cifs_set_netfs_context(struct inode *inode) 35 + { 36 + struct cifsInodeInfo *cifs_i = CIFS_I(inode); 37 + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 38 + 39 + netfs_inode_init(&cifs_i->netfs, &cifs_req_ops, true); 40 + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 41 + __set_bit(NETFS_ICTX_WRITETHROUGH, &cifs_i->netfs.flags); 42 + } 43 + 31 44 static void cifs_set_ops(struct inode *inode) 32 45 { 33 46 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 47 + struct netfs_inode *ictx = netfs_inode(inode); 34 48 35 49 switch (inode->i_mode & S_IFMT) { 36 50 case S_IFREG: 37 51 inode->i_op = &cifs_file_inode_ops; 38 52 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 53 + set_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags); 39 54 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 40 55 inode->i_fop = &cifs_file_direct_nobrl_ops; 41 56 else ··· 72 57 inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 73 58 else 74 59 inode->i_data.a_ops = &cifs_addr_ops; 60 + mapping_set_large_folios(inode->i_mapping); 75 61 break; 76 62 case S_IFDIR: 77 63 if (IS_AUTOMOUNT(inode)) { ··· 237 221 238 222 if (fattr->cf_flags & CIFS_FATTR_JUNCTION) 239 223 inode->i_flags |= S_AUTOMOUNT; 240 - if (inode->i_state & I_NEW) 224 + if (inode->i_state & I_NEW) { 225 + cifs_set_netfs_context(inode); 241 226 cifs_set_ops(inode); 227 + } 242 228 return 0; 243 229 } 244 230 ··· 2449 2431 return false; 2450 2432 } 2451 2433 2452 - /* 2453 - * Zap the cache. Called when invalid_mapping flag is set. 2454 - */ 2455 - int 2456 - cifs_invalidate_mapping(struct inode *inode) 2457 - { 2458 - int rc = 0; 2459 - 2460 - if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 2461 - rc = invalidate_inode_pages2(inode->i_mapping); 2462 - if (rc) 2463 - cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n", 2464 - __func__, inode, rc); 2465 - } 2466 - 2467 - return rc; 2468 - } 2469 - 2470 2434 /** 2471 2435 * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks 2472 2436 * ··· 2485 2485 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) 2486 2486 goto skip_invalidate; 2487 2487 2488 - rc = cifs_invalidate_mapping(inode); 2489 - if (rc) 2488 + rc = filemap_invalidate_inode(inode, true, 0, LLONG_MAX); 2489 + if (rc) { 2490 + cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n", 2491 + __func__, inode, rc); 2490 2492 set_bit(CIFS_INO_INVALID_MAPPING, flags); 2493 + } 2491 2494 } 2492 2495 2493 2496 skip_invalidate:

+5 -5

fs/smb/client/smb2ops.c

··· 217 217 } 218 218 219 219 static int 220 - smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, 221 - unsigned int *num, struct cifs_credits *credits) 220 + smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 221 + size_t *num, struct cifs_credits *credits) 222 222 { 223 223 int rc = 0; 224 224 unsigned int scredits, in_flight; ··· 4490 4490 unsigned int cur_off; 4491 4491 unsigned int cur_page_idx; 4492 4492 unsigned int pad_len; 4493 - struct cifs_readdata *rdata = mid->callback_data; 4493 + struct cifs_io_subrequest *rdata = mid->callback_data; 4494 4494 struct smb2_hdr *shdr = (struct smb2_hdr *)buf; 4495 4495 int length; 4496 4496 bool use_rdma_mr = false; ··· 4592 4592 4593 4593 /* Copy the data to the output I/O iterator. */ 4594 4594 rdata->result = cifs_copy_pages_to_iter(pages, pages_len, 4595 - cur_off, &rdata->iter); 4595 + cur_off, &rdata->subreq.io_iter); 4596 4596 if (rdata->result != 0) { 4597 4597 if (is_offloaded) 4598 4598 mid->mid_state = MID_RESPONSE_MALFORMED; ··· 4606 4606 /* read response payload is in buf */ 4607 4607 WARN_ONCE(pages && !xa_empty(pages), 4608 4608 "read data can be either in buf or in pages"); 4609 - length = copy_to_iter(buf + data_offset, data_len, &rdata->iter); 4609 + length = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); 4610 4610 if (length < 0) 4611 4611 return length; 4612 4612 rdata->got_bytes = data_len;

+105 -81

fs/smb/client/smb2pdu.c

··· 23 23 #include <linux/uuid.h> 24 24 #include <linux/pagemap.h> 25 25 #include <linux/xattr.h> 26 + #include <linux/netfs.h> 27 + #include <trace/events/netfs.h> 26 28 #include "cifsglob.h" 27 29 #include "cifsacl.h" 28 30 #include "cifsproto.h" ··· 4393 4391 */ 4394 4392 static int 4395 4393 smb2_new_read_req(void **buf, unsigned int *total_len, 4396 - struct cifs_io_parms *io_parms, struct cifs_readdata *rdata, 4394 + struct cifs_io_parms *io_parms, struct cifs_io_subrequest *rdata, 4397 4395 unsigned int remaining_bytes, int request_type) 4398 4396 { 4399 4397 int rc = -EACCES; ··· 4421 4419 req->Length = cpu_to_le32(io_parms->length); 4422 4420 req->Offset = cpu_to_le64(io_parms->offset); 4423 4421 4424 - trace_smb3_read_enter(0 /* xid */, 4425 - io_parms->persistent_fid, 4426 - io_parms->tcon->tid, io_parms->tcon->ses->Suid, 4427 - io_parms->offset, io_parms->length); 4422 + trace_smb3_read_enter(rdata ? rdata->rreq->debug_id : 0, 4423 + rdata ? rdata->subreq.debug_index : 0, 4424 + rdata ? rdata->xid : 0, 4425 + io_parms->persistent_fid, 4426 + io_parms->tcon->tid, io_parms->tcon->ses->Suid, 4427 + io_parms->offset, io_parms->length); 4428 4428 #ifdef CONFIG_CIFS_SMB_DIRECT 4429 4429 /* 4430 4430 * If we want to do a RDMA write, fill in and append ··· 4436 4432 struct smbd_buffer_descriptor_v1 *v1; 4437 4433 bool need_invalidate = server->dialect == SMB30_PROT_ID; 4438 4434 4439 - rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->iter, 4435 + rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->subreq.io_iter, 4440 4436 true, need_invalidate); 4441 4437 if (!rdata->mr) 4442 4438 return -EAGAIN; ··· 4487 4483 static void 4488 4484 smb2_readv_callback(struct mid_q_entry *mid) 4489 4485 { 4490 - struct cifs_readdata *rdata = mid->callback_data; 4491 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 4486 + struct cifs_io_subrequest *rdata = mid->callback_data; 4487 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 4492 4488 struct TCP_Server_Info *server = rdata->server; 4493 4489 struct smb2_hdr *shdr = 4494 4490 (struct smb2_hdr *)rdata->iov[0].iov_base; ··· 4496 4492 struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1 }; 4497 4493 4498 4494 if (rdata->got_bytes) { 4499 - rqst.rq_iter = rdata->iter; 4500 - rqst.rq_iter_size = iov_iter_count(&rdata->iter); 4495 + rqst.rq_iter = rdata->subreq.io_iter; 4496 + rqst.rq_iter_size = iov_iter_count(&rdata->subreq.io_iter); 4501 4497 } 4502 4498 4503 4499 WARN_ONCE(rdata->server != mid->server, 4504 4500 "rdata server %p != mid server %p", 4505 4501 rdata->server, mid->server); 4506 4502 4507 - cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", 4503 + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", 4508 4504 __func__, mid->mid, mid->mid_state, rdata->result, 4509 - rdata->bytes); 4505 + rdata->subreq.len); 4510 4506 4511 4507 switch (mid->mid_state) { 4512 4508 case MID_RESPONSE_RECEIVED: ··· 4516 4512 if (server->sign && !mid->decrypted) { 4517 4513 int rc; 4518 4514 4519 - iov_iter_revert(&rqst.rq_iter, rdata->got_bytes); 4520 4515 iov_iter_truncate(&rqst.rq_iter, rdata->got_bytes); 4521 4516 rc = smb2_verify_signature(&rqst, server); 4522 4517 if (rc) ··· 4556 4553 #endif 4557 4554 if (rdata->result && rdata->result != -ENODATA) { 4558 4555 cifs_stats_fail_inc(tcon, SMB2_READ_HE); 4559 - trace_smb3_read_err(0 /* xid */, 4560 - rdata->cfile->fid.persistent_fid, 4561 - tcon->tid, tcon->ses->Suid, rdata->offset, 4562 - rdata->bytes, rdata->result); 4556 + trace_smb3_read_err(rdata->rreq->debug_id, 4557 + rdata->subreq.debug_index, 4558 + rdata->xid, 4559 + rdata->req->cfile->fid.persistent_fid, 4560 + tcon->tid, tcon->ses->Suid, rdata->subreq.start, 4561 + rdata->subreq.len, rdata->result); 4563 4562 } else 4564 - trace_smb3_read_done(0 /* xid */, 4565 - rdata->cfile->fid.persistent_fid, 4563 + trace_smb3_read_done(rdata->rreq->debug_id, 4564 + rdata->subreq.debug_index, 4565 + rdata->xid, 4566 + rdata->req->cfile->fid.persistent_fid, 4566 4567 tcon->tid, tcon->ses->Suid, 4567 - rdata->offset, rdata->got_bytes); 4568 + rdata->subreq.start, rdata->got_bytes); 4568 4569 4569 - queue_work(cifsiod_wq, &rdata->work); 4570 + if (rdata->result == -ENODATA) { 4571 + /* We may have got an EOF error because fallocate 4572 + * failed to enlarge the file. 4573 + */ 4574 + if (rdata->subreq.start < rdata->subreq.rreq->i_size) 4575 + rdata->result = 0; 4576 + } 4577 + if (rdata->result == 0 || rdata->result == -EAGAIN) 4578 + iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); 4579 + rdata->credits.value = 0; 4580 + netfs_subreq_terminated(&rdata->subreq, 4581 + (rdata->result == 0 || rdata->result == -EAGAIN) ? 4582 + rdata->got_bytes : rdata->result, true); 4570 4583 release_mid(mid); 4571 4584 add_credits(server, &credits, 0); 4572 4585 } 4573 4586 4574 4587 /* smb2_async_readv - send an async read, and set up mid to handle result */ 4575 4588 int 4576 - smb2_async_readv(struct cifs_readdata *rdata) 4589 + smb2_async_readv(struct cifs_io_subrequest *rdata) 4577 4590 { 4578 4591 int rc, flags = 0; 4579 4592 char *buf; ··· 4598 4579 struct smb_rqst rqst = { .rq_iov = rdata->iov, 4599 4580 .rq_nvec = 1 }; 4600 4581 struct TCP_Server_Info *server; 4601 - struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 4582 + struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); 4602 4583 unsigned int total_len; 4603 4584 int credit_request; 4604 4585 4605 - cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 4606 - __func__, rdata->offset, rdata->bytes); 4586 + cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", 4587 + __func__, rdata->subreq.start, rdata->subreq.len); 4607 4588 4608 4589 if (!rdata->server) 4609 4590 rdata->server = cifs_pick_channel(tcon->ses); 4610 4591 4611 - io_parms.tcon = tlink_tcon(rdata->cfile->tlink); 4592 + io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink); 4612 4593 io_parms.server = server = rdata->server; 4613 - io_parms.offset = rdata->offset; 4614 - io_parms.length = rdata->bytes; 4615 - io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; 4616 - io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; 4594 + io_parms.offset = rdata->subreq.start; 4595 + io_parms.length = rdata->subreq.len; 4596 + io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; 4597 + io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; 4617 4598 io_parms.pid = rdata->pid; 4618 4599 4619 4600 rc = smb2_new_read_req( ··· 4630 4611 shdr = (struct smb2_hdr *)buf; 4631 4612 4632 4613 if (rdata->credits.value > 0) { 4633 - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, 4614 + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->subreq.len, 4634 4615 SMB2_MAX_BUFFER_SIZE)); 4635 4616 credit_request = le16_to_cpu(shdr->CreditCharge) + 8; 4636 4617 if (server->credits >= server->max_credits) ··· 4640 4621 min_t(int, server->max_credits - 4641 4622 server->credits, credit_request)); 4642 4623 4643 - rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4624 + rc = adjust_credits(server, &rdata->credits, rdata->subreq.len); 4644 4625 if (rc) 4645 4626 goto async_readv_out; 4646 4627 4647 4628 flags |= CIFS_HAS_CREDITS; 4648 4629 } 4649 4630 4650 - kref_get(&rdata->refcount); 4651 4631 rc = cifs_call_async(server, &rqst, 4652 4632 cifs_readv_receive, smb2_readv_callback, 4653 4633 smb3_handle_read_data, rdata, flags, 4654 4634 &rdata->credits); 4655 4635 if (rc) { 4656 - kref_put(&rdata->refcount, cifs_readdata_release); 4657 4636 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); 4658 - trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid, 4637 + trace_smb3_read_err(rdata->rreq->debug_id, 4638 + rdata->subreq.debug_index, 4639 + rdata->xid, io_parms.persistent_fid, 4659 4640 io_parms.tcon->tid, 4660 4641 io_parms.tcon->ses->Suid, 4661 4642 io_parms.offset, io_parms.length, rc); ··· 4706 4687 if (rc != -ENODATA) { 4707 4688 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); 4708 4689 cifs_dbg(VFS, "Send error in read = %d\n", rc); 4709 - trace_smb3_read_err(xid, 4690 + trace_smb3_read_err(0, 0, xid, 4710 4691 req->PersistentFileId, 4711 4692 io_parms->tcon->tid, ses->Suid, 4712 4693 io_parms->offset, io_parms->length, 4713 4694 rc); 4714 4695 } else 4715 - trace_smb3_read_done(xid, req->PersistentFileId, io_parms->tcon->tid, 4696 + trace_smb3_read_done(0, 0, xid, 4697 + req->PersistentFileId, io_parms->tcon->tid, 4716 4698 ses->Suid, io_parms->offset, 0); 4717 4699 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 4718 4700 cifs_small_buf_release(req); 4719 4701 return rc == -ENODATA ? 0 : rc; 4720 4702 } else 4721 - trace_smb3_read_done(xid, 4722 - req->PersistentFileId, 4723 - io_parms->tcon->tid, ses->Suid, 4724 - io_parms->offset, io_parms->length); 4703 + trace_smb3_read_done(0, 0, xid, 4704 + req->PersistentFileId, 4705 + io_parms->tcon->tid, ses->Suid, 4706 + io_parms->offset, io_parms->length); 4725 4707 4726 4708 cifs_small_buf_release(req); 4727 4709 ··· 4755 4735 static void 4756 4736 smb2_writev_callback(struct mid_q_entry *mid) 4757 4737 { 4758 - struct cifs_writedata *wdata = mid->callback_data; 4759 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 4738 + struct cifs_io_subrequest *wdata = mid->callback_data; 4739 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 4760 4740 struct TCP_Server_Info *server = wdata->server; 4761 - unsigned int written; 4762 4741 struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; 4763 4742 struct cifs_credits credits = { .value = 0, .instance = 0 }; 4743 + ssize_t result = 0; 4744 + size_t written; 4764 4745 4765 4746 WARN_ONCE(wdata->server != mid->server, 4766 4747 "wdata server %p != mid server %p", ··· 4771 4750 case MID_RESPONSE_RECEIVED: 4772 4751 credits.value = le16_to_cpu(rsp->hdr.CreditRequest); 4773 4752 credits.instance = server->reconnect_instance; 4774 - wdata->result = smb2_check_receive(mid, server, 0); 4775 - if (wdata->result != 0) 4753 + result = smb2_check_receive(mid, server, 0); 4754 + if (result != 0) 4776 4755 break; 4777 4756 4778 4757 written = le32_to_cpu(rsp->DataLength); ··· 4782 4761 * client. OS/2 servers are known to set incorrect 4783 4762 * CountHigh values. 4784 4763 */ 4785 - if (written > wdata->bytes) 4764 + if (written > wdata->subreq.len) 4786 4765 written &= 0xFFFF; 4787 4766 4788 - if (written < wdata->bytes) 4767 + if (written < wdata->subreq.len) 4789 4768 wdata->result = -ENOSPC; 4790 4769 else 4791 - wdata->bytes = written; 4770 + wdata->subreq.len = written; 4771 + iov_iter_advance(&wdata->subreq.io_iter, written); 4792 4772 break; 4793 4773 case MID_REQUEST_SUBMITTED: 4794 4774 case MID_RETRY_NEEDED: 4795 - wdata->result = -EAGAIN; 4775 + result = -EAGAIN; 4796 4776 break; 4797 4777 case MID_RESPONSE_MALFORMED: 4798 4778 credits.value = le16_to_cpu(rsp->hdr.CreditRequest); 4799 4779 credits.instance = server->reconnect_instance; 4800 4780 fallthrough; 4801 4781 default: 4802 - wdata->result = -EIO; 4782 + result = -EIO; 4803 4783 break; 4804 4784 } 4805 4785 #ifdef CONFIG_CIFS_SMB_DIRECT ··· 4816 4794 wdata->mr = NULL; 4817 4795 } 4818 4796 #endif 4819 - if (wdata->result) { 4797 + if (result) { 4820 4798 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 4821 - trace_smb3_write_err(0 /* no xid */, 4822 - wdata->cfile->fid.persistent_fid, 4823 - tcon->tid, tcon->ses->Suid, wdata->offset, 4824 - wdata->bytes, wdata->result); 4799 + trace_smb3_write_err(wdata->xid, 4800 + wdata->req->cfile->fid.persistent_fid, 4801 + tcon->tid, tcon->ses->Suid, wdata->subreq.start, 4802 + wdata->subreq.len, wdata->result); 4825 4803 if (wdata->result == -ENOSPC) 4826 4804 pr_warn_once("Out of space writing to %s\n", 4827 4805 tcon->tree_name); 4828 4806 } else 4829 4807 trace_smb3_write_done(0 /* no xid */, 4830 - wdata->cfile->fid.persistent_fid, 4808 + wdata->req->cfile->fid.persistent_fid, 4831 4809 tcon->tid, tcon->ses->Suid, 4832 - wdata->offset, wdata->bytes); 4810 + wdata->subreq.start, wdata->subreq.len); 4833 4811 4834 - queue_work(cifsiod_wq, &wdata->work); 4812 + wdata->credits.value = 0; 4813 + cifs_write_subrequest_terminated(wdata, result ?: written, true); 4835 4814 release_mid(mid); 4836 4815 add_credits(server, &credits, 0); 4837 4816 } 4838 4817 4839 4818 /* smb2_async_writev - send an async write, and set up mid to handle result */ 4840 - int 4841 - smb2_async_writev(struct cifs_writedata *wdata, 4842 - void (*release)(struct kref *kref)) 4819 + void 4820 + smb2_async_writev(struct cifs_io_subrequest *wdata) 4843 4821 { 4844 4822 int rc = -EACCES, flags = 0; 4845 4823 struct smb2_write_req *req = NULL; 4846 4824 struct smb2_hdr *shdr; 4847 - struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 4825 + struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); 4848 4826 struct TCP_Server_Info *server = wdata->server; 4849 4827 struct kvec iov[1]; 4850 4828 struct smb_rqst rqst = { }; 4851 - unsigned int total_len; 4829 + unsigned int total_len, xid = wdata->xid; 4852 4830 struct cifs_io_parms _io_parms; 4853 4831 struct cifs_io_parms *io_parms = NULL; 4854 4832 int credit_request; 4855 4833 4856 - if (!wdata->server || wdata->replay) 4834 + if (!wdata->server || test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) 4857 4835 server = wdata->server = cifs_pick_channel(tcon->ses); 4858 4836 4859 4837 /* ··· 4863 4841 _io_parms = (struct cifs_io_parms) { 4864 4842 .tcon = tcon, 4865 4843 .server = server, 4866 - .offset = wdata->offset, 4867 - .length = wdata->bytes, 4868 - .persistent_fid = wdata->cfile->fid.persistent_fid, 4869 - .volatile_fid = wdata->cfile->fid.volatile_fid, 4844 + .offset = wdata->subreq.start, 4845 + .length = wdata->subreq.len, 4846 + .persistent_fid = wdata->req->cfile->fid.persistent_fid, 4847 + .volatile_fid = wdata->req->cfile->fid.volatile_fid, 4870 4848 .pid = wdata->pid, 4871 4849 }; 4872 4850 io_parms = &_io_parms; ··· 4874 4852 rc = smb2_plain_req_init(SMB2_WRITE, tcon, server, 4875 4853 (void **) &req, &total_len); 4876 4854 if (rc) 4877 - return rc; 4855 + goto out; 4878 4856 4879 4857 if (smb3_encryption_required(tcon)) 4880 4858 flags |= CIFS_TRANSFORM_REQ; ··· 4892 4870 offsetof(struct smb2_write_req, Buffer)); 4893 4871 req->RemainingBytes = 0; 4894 4872 4895 - trace_smb3_write_enter(0 /* xid */, 4873 + trace_smb3_write_enter(wdata->xid, 4896 4874 io_parms->persistent_fid, 4897 4875 io_parms->tcon->tid, 4898 4876 io_parms->tcon->ses->Suid, ··· 4906 4884 */ 4907 4885 if (smb3_use_rdma_offload(io_parms)) { 4908 4886 struct smbd_buffer_descriptor_v1 *v1; 4909 - size_t data_size = iov_iter_count(&wdata->iter); 4887 + size_t data_size = iov_iter_count(&wdata->subreq.io_iter); 4910 4888 bool need_invalidate = server->dialect == SMB30_PROT_ID; 4911 4889 4912 - wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->iter, 4890 + wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, 4913 4891 false, need_invalidate); 4914 4892 if (!wdata->mr) { 4915 4893 rc = -EAGAIN; ··· 4936 4914 4937 4915 rqst.rq_iov = iov; 4938 4916 rqst.rq_nvec = 1; 4939 - rqst.rq_iter = wdata->iter; 4917 + rqst.rq_iter = wdata->subreq.io_iter; 4940 4918 rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter); 4941 - if (wdata->replay) 4919 + if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) 4942 4920 smb2_set_replay(server, &rqst); 4943 4921 #ifdef CONFIG_CIFS_SMB_DIRECT 4944 4922 if (wdata->mr) ··· 4956 4934 #endif 4957 4935 4958 4936 if (wdata->credits.value > 0) { 4959 - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, 4937 + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->subreq.len, 4960 4938 SMB2_MAX_BUFFER_SIZE)); 4961 4939 credit_request = le16_to_cpu(shdr->CreditCharge) + 8; 4962 4940 if (server->credits >= server->max_credits) ··· 4973 4951 flags |= CIFS_HAS_CREDITS; 4974 4952 } 4975 4953 4976 - kref_get(&wdata->refcount); 4977 4954 rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, NULL, 4978 4955 wdata, flags, &wdata->credits); 4979 - 4956 + /* Can't touch wdata if rc == 0 */ 4980 4957 if (rc) { 4981 - trace_smb3_write_err(0 /* no xid */, 4958 + trace_smb3_write_err(xid, 4982 4959 io_parms->persistent_fid, 4983 4960 io_parms->tcon->tid, 4984 4961 io_parms->tcon->ses->Suid, 4985 4962 io_parms->offset, 4986 4963 io_parms->length, 4987 4964 rc); 4988 - kref_put(&wdata->refcount, release); 4989 4965 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 4990 4966 } 4991 4967 4992 4968 async_writev_out: 4993 4969 cifs_small_buf_release(req); 4994 - return rc; 4970 + out: 4971 + if (rc) { 4972 + add_credits_and_wake_if(wdata->server, &wdata->credits, 0); 4973 + cifs_write_subrequest_terminated(wdata, rc, true); 4974 + } 4995 4975 } 4996 4976 4997 4977 /*

+2 -3

fs/smb/client/smb2proto.h

··· 210 210 extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, 211 211 u64 persistent_fid, u64 volatile_fid, 212 212 __le64 *uniqueid); 213 - extern int smb2_async_readv(struct cifs_readdata *rdata); 213 + extern int smb2_async_readv(struct cifs_io_subrequest *rdata); 214 214 extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, 215 215 unsigned int *nbytes, char **buf, int *buf_type); 216 - extern int smb2_async_writev(struct cifs_writedata *wdata, 217 - void (*release)(struct kref *kref)); 216 + extern void smb2_async_writev(struct cifs_io_subrequest *wdata); 218 217 extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, 219 218 unsigned int *nbytes, struct kvec *iov, int n_vec); 220 219 extern int SMB2_echo(struct TCP_Server_Info *server);

+125 -19

fs/smb/client/trace.h

··· 85 85 86 86 /* For logging errors in read or write */ 87 87 DECLARE_EVENT_CLASS(smb3_rw_err_class, 88 + TP_PROTO(unsigned int rreq_debug_id, 89 + unsigned int rreq_debug_index, 90 + unsigned int xid, 91 + __u64 fid, 92 + __u32 tid, 93 + __u64 sesid, 94 + __u64 offset, 95 + __u32 len, 96 + int rc), 97 + TP_ARGS(rreq_debug_id, rreq_debug_index, 98 + xid, fid, tid, sesid, offset, len, rc), 99 + TP_STRUCT__entry( 100 + __field(unsigned int, rreq_debug_id) 101 + __field(unsigned int, rreq_debug_index) 102 + __field(unsigned int, xid) 103 + __field(__u64, fid) 104 + __field(__u32, tid) 105 + __field(__u64, sesid) 106 + __field(__u64, offset) 107 + __field(__u32, len) 108 + __field(int, rc) 109 + ), 110 + TP_fast_assign( 111 + __entry->rreq_debug_id = rreq_debug_id; 112 + __entry->rreq_debug_index = rreq_debug_index; 113 + __entry->xid = xid; 114 + __entry->fid = fid; 115 + __entry->tid = tid; 116 + __entry->sesid = sesid; 117 + __entry->offset = offset; 118 + __entry->len = len; 119 + __entry->rc = rc; 120 + ), 121 + TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", 122 + __entry->rreq_debug_id, __entry->rreq_debug_index, 123 + __entry->xid, __entry->sesid, __entry->tid, __entry->fid, 124 + __entry->offset, __entry->len, __entry->rc) 125 + ) 126 + 127 + #define DEFINE_SMB3_RW_ERR_EVENT(name) \ 128 + DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \ 129 + TP_PROTO(unsigned int rreq_debug_id, \ 130 + unsigned int rreq_debug_index, \ 131 + unsigned int xid, \ 132 + __u64 fid, \ 133 + __u32 tid, \ 134 + __u64 sesid, \ 135 + __u64 offset, \ 136 + __u32 len, \ 137 + int rc), \ 138 + TP_ARGS(rreq_debug_id, rreq_debug_index, xid, fid, tid, sesid, offset, len, rc)) 139 + 140 + DEFINE_SMB3_RW_ERR_EVENT(read_err); 141 + 142 + /* For logging errors in other file I/O ops */ 143 + DECLARE_EVENT_CLASS(smb3_other_err_class, 88 144 TP_PROTO(unsigned int xid, 89 145 __u64 fid, 90 146 __u32 tid, ··· 172 116 __entry->offset, __entry->len, __entry->rc) 173 117 ) 174 118 175 - #define DEFINE_SMB3_RW_ERR_EVENT(name) \ 176 - DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \ 119 + #define DEFINE_SMB3_OTHER_ERR_EVENT(name) \ 120 + DEFINE_EVENT(smb3_other_err_class, smb3_##name, \ 177 121 TP_PROTO(unsigned int xid, \ 178 122 __u64 fid, \ 179 123 __u32 tid, \ ··· 183 127 int rc), \ 184 128 TP_ARGS(xid, fid, tid, sesid, offset, len, rc)) 185 129 186 - DEFINE_SMB3_RW_ERR_EVENT(write_err); 187 - DEFINE_SMB3_RW_ERR_EVENT(read_err); 188 - DEFINE_SMB3_RW_ERR_EVENT(query_dir_err); 189 - DEFINE_SMB3_RW_ERR_EVENT(zero_err); 190 - DEFINE_SMB3_RW_ERR_EVENT(falloc_err); 130 + DEFINE_SMB3_OTHER_ERR_EVENT(write_err); 131 + DEFINE_SMB3_OTHER_ERR_EVENT(query_dir_err); 132 + DEFINE_SMB3_OTHER_ERR_EVENT(zero_err); 133 + DEFINE_SMB3_OTHER_ERR_EVENT(falloc_err); 191 134 192 135 193 136 /* For logging successful read or write */ 194 137 DECLARE_EVENT_CLASS(smb3_rw_done_class, 138 + TP_PROTO(unsigned int rreq_debug_id, 139 + unsigned int rreq_debug_index, 140 + unsigned int xid, 141 + __u64 fid, 142 + __u32 tid, 143 + __u64 sesid, 144 + __u64 offset, 145 + __u32 len), 146 + TP_ARGS(rreq_debug_id, rreq_debug_index, 147 + xid, fid, tid, sesid, offset, len), 148 + TP_STRUCT__entry( 149 + __field(unsigned int, rreq_debug_id) 150 + __field(unsigned int, rreq_debug_index) 151 + __field(unsigned int, xid) 152 + __field(__u64, fid) 153 + __field(__u32, tid) 154 + __field(__u64, sesid) 155 + __field(__u64, offset) 156 + __field(__u32, len) 157 + ), 158 + TP_fast_assign( 159 + __entry->rreq_debug_id = rreq_debug_id; 160 + __entry->rreq_debug_index = rreq_debug_index; 161 + __entry->xid = xid; 162 + __entry->fid = fid; 163 + __entry->tid = tid; 164 + __entry->sesid = sesid; 165 + __entry->offset = offset; 166 + __entry->len = len; 167 + ), 168 + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x", 169 + __entry->rreq_debug_id, __entry->rreq_debug_index, 170 + __entry->xid, __entry->sesid, __entry->tid, __entry->fid, 171 + __entry->offset, __entry->len) 172 + ) 173 + 174 + #define DEFINE_SMB3_RW_DONE_EVENT(name) \ 175 + DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \ 176 + TP_PROTO(unsigned int rreq_debug_id, \ 177 + unsigned int rreq_debug_index, \ 178 + unsigned int xid, \ 179 + __u64 fid, \ 180 + __u32 tid, \ 181 + __u64 sesid, \ 182 + __u64 offset, \ 183 + __u32 len), \ 184 + TP_ARGS(rreq_debug_id, rreq_debug_index, xid, fid, tid, sesid, offset, len)) 185 + 186 + DEFINE_SMB3_RW_DONE_EVENT(read_enter); 187 + DEFINE_SMB3_RW_DONE_EVENT(read_done); 188 + 189 + /* For logging successful other op */ 190 + DECLARE_EVENT_CLASS(smb3_other_done_class, 195 191 TP_PROTO(unsigned int xid, 196 192 __u64 fid, 197 193 __u32 tid, ··· 272 164 __entry->offset, __entry->len) 273 165 ) 274 166 275 - #define DEFINE_SMB3_RW_DONE_EVENT(name) \ 276 - DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \ 167 + #define DEFINE_SMB3_OTHER_DONE_EVENT(name) \ 168 + DEFINE_EVENT(smb3_other_done_class, smb3_##name, \ 277 169 TP_PROTO(unsigned int xid, \ 278 170 __u64 fid, \ 279 171 __u32 tid, \ ··· 282 174 __u32 len), \ 283 175 TP_ARGS(xid, fid, tid, sesid, offset, len)) 284 176 285 - DEFINE_SMB3_RW_DONE_EVENT(write_enter); 286 - DEFINE_SMB3_RW_DONE_EVENT(read_enter); 287 - DEFINE_SMB3_RW_DONE_EVENT(query_dir_enter); 288 - DEFINE_SMB3_RW_DONE_EVENT(zero_enter); 289 - DEFINE_SMB3_RW_DONE_EVENT(falloc_enter); 290 - DEFINE_SMB3_RW_DONE_EVENT(write_done); 291 - DEFINE_SMB3_RW_DONE_EVENT(read_done); 292 - DEFINE_SMB3_RW_DONE_EVENT(query_dir_done); 293 - DEFINE_SMB3_RW_DONE_EVENT(zero_done); 294 - DEFINE_SMB3_RW_DONE_EVENT(falloc_done); 177 + DEFINE_SMB3_OTHER_DONE_EVENT(write_enter); 178 + DEFINE_SMB3_OTHER_DONE_EVENT(query_dir_enter); 179 + DEFINE_SMB3_OTHER_DONE_EVENT(zero_enter); 180 + DEFINE_SMB3_OTHER_DONE_EVENT(falloc_enter); 181 + DEFINE_SMB3_OTHER_DONE_EVENT(write_done); 182 + DEFINE_SMB3_OTHER_DONE_EVENT(query_dir_done); 183 + DEFINE_SMB3_OTHER_DONE_EVENT(zero_done); 184 + DEFINE_SMB3_OTHER_DONE_EVENT(falloc_done); 295 185 296 186 /* For logging successful set EOF (truncate) */ 297 187 DECLARE_EVENT_CLASS(smb3_eof_class,

+10 -7

fs/smb/client/transport.c

··· 691 691 } 692 692 693 693 int 694 - cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, 695 - unsigned int *num, struct cifs_credits *credits) 694 + cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 695 + size_t *num, struct cifs_credits *credits) 696 696 { 697 697 *num = size; 698 698 credits->value = 0; ··· 1692 1692 static int 1693 1693 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1694 1694 { 1695 - struct cifs_readdata *rdata = mid->callback_data; 1695 + struct cifs_io_subrequest *rdata = mid->callback_data; 1696 1696 1697 1697 return __cifs_readv_discard(server, mid, rdata->result); 1698 1698 } ··· 1702 1702 { 1703 1703 int length, len; 1704 1704 unsigned int data_offset, data_len; 1705 - struct cifs_readdata *rdata = mid->callback_data; 1705 + struct cifs_io_subrequest *rdata = mid->callback_data; 1706 1706 char *buf = server->smallbuf; 1707 1707 unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); 1708 1708 bool use_rdma_mr = false; 1709 1709 1710 - cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", 1711 - __func__, mid->mid, rdata->offset, rdata->bytes); 1710 + cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", 1711 + __func__, mid->mid, rdata->subreq.start, rdata->subreq.len); 1712 1712 1713 1713 /* 1714 1714 * read the rest of READ_RSP header (sans Data array), or whatever we ··· 1813 1813 length = data_len; /* An RDMA read is already done. */ 1814 1814 else 1815 1815 #endif 1816 - length = cifs_read_iter_from_socket(server, &rdata->iter, 1816 + { 1817 + length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter, 1817 1818 data_len); 1819 + iov_iter_revert(&rdata->subreq.io_iter, data_len); 1820 + } 1818 1821 if (length > 0) 1819 1822 rdata->got_bytes += length; 1820 1823 server->total_read += length;

+14 -8

include/linux/fscache.h

··· 172 172 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); 173 173 extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); 174 174 175 - extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, 176 - loff_t, size_t, loff_t, netfs_io_terminated_t, void *, 177 - bool); 175 + void __fscache_write_to_cache(struct fscache_cookie *cookie, 176 + struct address_space *mapping, 177 + loff_t start, size_t len, loff_t i_size, 178 + netfs_io_terminated_t term_func, 179 + void *term_func_priv, 180 + bool using_pgpriv2, bool cond); 178 181 extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); 179 182 180 183 /** ··· 600 597 * @i_size: The new size of the inode 601 598 * @term_func: The function to call upon completion 602 599 * @term_func_priv: The private data for @term_func 603 - * @caching: If PG_fscache has been set 600 + * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write 601 + * @caching: If we actually want to do the caching 604 602 * 605 603 * Helper function for a netfs to write dirty data from an inode into the cache 606 604 * object that's backing it. ··· 612 608 * marked with PG_fscache. 613 609 * 614 610 * If given, @term_func will be called upon completion and supplied with 615 - * @term_func_priv. Note that the PG_fscache flags will have been cleared by 616 - * this point, so the netfs must retain its own pin on the mapping. 611 + * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags 612 + * will have been cleared by this point, so the netfs must retain its own pin 613 + * on the mapping. 617 614 */ 618 615 static inline void fscache_write_to_cache(struct fscache_cookie *cookie, 619 616 struct address_space *mapping, 620 617 loff_t start, size_t len, loff_t i_size, 621 618 netfs_io_terminated_t term_func, 622 619 void *term_func_priv, 623 - bool caching) 620 + bool using_pgpriv2, bool caching) 624 621 { 625 622 if (caching) 626 623 __fscache_write_to_cache(cookie, mapping, start, len, i_size, 627 - term_func, term_func_priv, caching); 624 + term_func, term_func_priv, 625 + using_pgpriv2, caching); 628 626 else if (term_func) 629 627 term_func(term_func_priv, -ENOBUFS, false); 630 628

+102 -99

include/linux/netfs.h

··· 20 20 #include <linux/uio.h> 21 21 22 22 enum netfs_sreq_ref_trace; 23 - 24 - /* 25 - * Overload PG_private_2 to give us PG_fscache - this is used to indicate that 26 - * a page is currently backed by a local disk cache 27 - */ 28 - #define folio_test_fscache(folio) folio_test_private_2(folio) 29 - #define PageFsCache(page) PagePrivate2((page)) 30 - #define SetPageFsCache(page) SetPagePrivate2((page)) 31 - #define ClearPageFsCache(page) ClearPagePrivate2((page)) 32 - #define TestSetPageFsCache(page) TestSetPagePrivate2((page)) 33 - #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) 23 + typedef struct mempool_s mempool_t; 34 24 35 25 /** 36 - * folio_start_fscache - Start an fscache write on a folio. 26 + * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] 37 27 * @folio: The folio. 38 28 * 39 29 * Call this function before writing a folio to a local cache. Starting a 40 30 * second write before the first one finishes is not allowed. 31 + * 32 + * Note that this should no longer be used. 41 33 */ 42 - static inline void folio_start_fscache(struct folio *folio) 34 + static inline void folio_start_private_2(struct folio *folio) 43 35 { 44 36 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); 45 37 folio_get(folio); 46 38 folio_set_private_2(folio); 47 - } 48 - 49 - /** 50 - * folio_end_fscache - End an fscache write on a folio. 51 - * @folio: The folio. 52 - * 53 - * Call this function after the folio has been written to the local cache. 54 - * This will wake any sleepers waiting on this folio. 55 - */ 56 - static inline void folio_end_fscache(struct folio *folio) 57 - { 58 - folio_end_private_2(folio); 59 - } 60 - 61 - /** 62 - * folio_wait_fscache - Wait for an fscache write on this folio to end. 63 - * @folio: The folio. 64 - * 65 - * If this folio is currently being written to a local cache, wait for 66 - * the write to finish. Another write may start after this one finishes, 67 - * unless the caller holds the folio lock. 68 - */ 69 - static inline void folio_wait_fscache(struct folio *folio) 70 - { 71 - folio_wait_private_2(folio); 72 - } 73 - 74 - /** 75 - * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. 76 - * @folio: The folio. 77 - * 78 - * If this folio is currently being written to a local cache, wait 79 - * for the write to finish or for a fatal signal to be received. 80 - * Another write may start after this one finishes, unless the caller 81 - * holds the folio lock. 82 - * 83 - * Return: 84 - * - 0 if successful. 85 - * - -EINTR if a fatal signal was encountered. 86 - */ 87 - static inline int folio_wait_fscache_killable(struct folio *folio) 88 - { 89 - return folio_wait_private_2_killable(folio); 90 - } 91 - 92 - static inline void set_page_fscache(struct page *page) 93 - { 94 - folio_start_fscache(page_folio(page)); 95 - } 96 - 97 - static inline void end_page_fscache(struct page *page) 98 - { 99 - folio_end_private_2(page_folio(page)); 100 - } 101 - 102 - static inline void wait_on_page_fscache(struct page *page) 103 - { 104 - folio_wait_private_2(page_folio(page)); 105 - } 106 - 107 - static inline int wait_on_page_fscache_killable(struct page *page) 108 - { 109 - return folio_wait_private_2_killable(page_folio(page)); 110 39 } 111 40 112 41 /* Marks used on xarray-based buffers */ ··· 64 135 #if IS_ENABLED(CONFIG_FSCACHE) 65 136 struct fscache_cookie *cache; 66 137 #endif 138 + struct mutex wb_lock; /* Writeback serialisation */ 67 139 loff_t remote_i_size; /* Size of the remote file */ 68 140 loff_t zero_point; /* Size after which we assume there's no data 69 141 * on the server */ ··· 72 142 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ 73 143 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ 74 144 #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ 75 - #define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ 145 + #define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 146 + * write to cache on read */ 76 147 }; 77 148 78 149 /* ··· 96 165 unsigned int dirty_len; /* Write-streaming dirty data length */ 97 166 }; 98 167 #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ 168 + #define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */ 169 + 170 + static inline bool netfs_is_folio_info(const void *priv) 171 + { 172 + return (unsigned long)priv & NETFS_FOLIO_INFO; 173 + } 174 + 175 + static inline struct netfs_folio *__netfs_folio_info(const void *priv) 176 + { 177 + if (netfs_is_folio_info(priv)) 178 + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 179 + return NULL; 180 + } 99 181 100 182 static inline struct netfs_folio *netfs_folio_info(struct folio *folio) 101 183 { 102 - void *priv = folio_get_private(folio); 103 - 104 - if ((unsigned long)priv & NETFS_FOLIO_INFO) 105 - return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); 106 - return NULL; 184 + return __netfs_folio_info(folio_get_private(folio)); 107 185 } 108 186 109 187 static inline struct netfs_group *netfs_folio_group(struct folio *folio) ··· 125 185 return finfo->netfs_group; 126 186 return priv; 127 187 } 188 + 189 + /* 190 + * Stream of I/O subrequests going to a particular destination, such as the 191 + * server or the local cache. This is mainly intended for writing where we may 192 + * have to write to multiple destinations concurrently. 193 + */ 194 + struct netfs_io_stream { 195 + /* Submission tracking */ 196 + struct netfs_io_subrequest *construct; /* Op being constructed */ 197 + unsigned int submit_off; /* Folio offset we're submitting from */ 198 + unsigned int submit_len; /* Amount of data left to submit */ 199 + unsigned int submit_max_len; /* Amount I/O can be rounded up to */ 200 + void (*prepare_write)(struct netfs_io_subrequest *subreq); 201 + void (*issue_write)(struct netfs_io_subrequest *subreq); 202 + /* Collection tracking */ 203 + struct list_head subrequests; /* Contributory I/O operations */ 204 + struct netfs_io_subrequest *front; /* Op being collected */ 205 + unsigned long long collected_to; /* Position we've collected results to */ 206 + size_t transferred; /* The amount transferred from this stream */ 207 + enum netfs_io_source source; /* Where to read from/write to */ 208 + unsigned short error; /* Aggregate error for the stream */ 209 + unsigned char stream_nr; /* Index of stream in parent table */ 210 + bool avail; /* T if stream is available */ 211 + bool active; /* T if stream is active */ 212 + bool need_retry; /* T if this stream needs retrying */ 213 + bool failed; /* T if this stream failed */ 214 + }; 128 215 129 216 /* 130 217 * Resources required to do operations on a cache. ··· 176 209 struct work_struct work; 177 210 struct list_head rreq_link; /* Link in rreq->subrequests */ 178 211 struct iov_iter io_iter; /* Iterator for this subrequest */ 179 - loff_t start; /* Where to start the I/O */ 212 + unsigned long long start; /* Where to start the I/O */ 213 + size_t max_len; /* Maximum size of the I/O */ 180 214 size_t len; /* Size of the I/O */ 181 215 size_t transferred; /* Amount of data transferred */ 182 216 refcount_t ref; 183 217 short error; /* 0 or error that occurred */ 184 218 unsigned short debug_index; /* Index in list (for debugging output) */ 219 + unsigned int nr_segs; /* Number of segs in io_iter */ 185 220 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ 186 221 enum netfs_io_source source; /* Where to read from/write to */ 222 + unsigned char stream_nr; /* I/O stream this belongs to */ 187 223 unsigned long flags; 188 224 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ 189 225 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ ··· 194 224 #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ 195 225 #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ 196 226 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ 227 + #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ 228 + #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ 229 + #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ 230 + #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ 231 + #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ 197 232 }; 198 233 199 234 enum netfs_io_origin { 200 235 NETFS_READAHEAD, /* This read was triggered by readahead */ 201 236 NETFS_READPAGE, /* This read is a synchronous read */ 202 237 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ 238 + NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ 203 239 NETFS_WRITEBACK, /* This write was triggered by writepages */ 204 240 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ 205 - NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ 206 241 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ 207 242 NETFS_DIO_READ, /* This is a direct I/O read */ 208 243 NETFS_DIO_WRITE, /* This is a direct I/O write */ ··· 229 254 struct netfs_cache_resources cache_resources; 230 255 struct list_head proc_link; /* Link in netfs_iorequests */ 231 256 struct list_head subrequests; /* Contributory I/O operations */ 257 + struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ 258 + #define NR_IO_STREAMS 2 //wreq->nr_io_streams 259 + struct netfs_group *group; /* Writeback group being written back */ 232 260 struct iov_iter iter; /* Unencrypted-side iterator */ 233 261 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ 234 262 void *netfs_priv; /* Private data for the netfs */ 263 + void *netfs_priv2; /* Private data for the netfs */ 235 264 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ 236 265 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ 237 266 unsigned int debug_id; 238 267 unsigned int rsize; /* Maximum read size (0 for none) */ 239 268 unsigned int wsize; /* Maximum write size (0 for none) */ 240 - unsigned int subreq_counter; /* Next subreq->debug_index */ 269 + atomic_t subreq_counter; /* Next subreq->debug_index */ 270 + unsigned int nr_group_rel; /* Number of refs to release on ->group */ 271 + spinlock_t lock; /* Lock for queuing subreqs */ 241 272 atomic_t nr_outstanding; /* Number of ops in progress */ 242 273 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ 243 - size_t submitted; /* Amount submitted for I/O so far */ 244 - size_t len; /* Length of the request */ 245 274 size_t upper_len; /* Length can be extended to here */ 275 + unsigned long long submitted; /* Amount submitted for I/O so far */ 276 + unsigned long long len; /* Length of the request */ 246 277 size_t transferred; /* Amount to be indicated as transferred */ 247 278 short error; /* 0 or error that occurred */ 248 279 enum netfs_io_origin origin; /* Origin of the request */ 249 280 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ 250 - loff_t i_size; /* Size of the file */ 251 - loff_t start; /* Start position */ 281 + unsigned long long i_size; /* Size of the file */ 282 + unsigned long long start; /* Start position */ 283 + atomic64_t issued_to; /* Write issuer folio cursor */ 284 + unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ 285 + unsigned long long collected_to; /* Point we've collected to */ 286 + unsigned long long cleaned_to; /* Position we've cleaned folios to */ 252 287 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ 253 288 refcount_t ref; 254 289 unsigned long flags; ··· 272 287 #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ 273 288 #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ 274 289 #define NETFS_RREQ_BLOCKED 10 /* We blocked */ 290 + #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ 291 + #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ 292 + #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ 293 + #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark 294 + * write to cache on read */ 275 295 const struct netfs_request_ops *netfs_ops; 276 296 void (*cleanup)(struct netfs_io_request *req); 277 297 }; ··· 285 295 * Operations the network filesystem can/must provide to the helpers. 286 296 */ 287 297 struct netfs_request_ops { 288 - unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ 289 - unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ 298 + mempool_t *request_pool; 299 + mempool_t *subrequest_pool; 290 300 int (*init_request)(struct netfs_io_request *rreq, struct file *file); 291 301 void (*free_request)(struct netfs_io_request *rreq); 292 302 void (*free_subrequest)(struct netfs_io_subrequest *rreq); ··· 302 312 303 313 /* Modification handling */ 304 314 void (*update_i_size)(struct inode *inode, loff_t i_size); 315 + void (*post_modify)(struct inode *inode); 305 316 306 317 /* Write request handling */ 307 - void (*create_write_requests)(struct netfs_io_request *wreq, 308 - loff_t start, size_t len); 318 + void (*begin_writeback)(struct netfs_io_request *wreq); 319 + void (*prepare_write)(struct netfs_io_subrequest *subreq); 320 + void (*issue_write)(struct netfs_io_subrequest *subreq); 321 + void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream); 309 322 void (*invalidate_cache)(struct netfs_io_request *wreq); 310 323 }; 311 324 ··· 343 350 netfs_io_terminated_t term_func, 344 351 void *term_func_priv); 345 352 353 + /* Write data to the cache from a netfs subrequest. */ 354 + void (*issue_write)(struct netfs_io_subrequest *subreq); 355 + 346 356 /* Expand readahead request */ 347 357 void (*expand_readahead)(struct netfs_cache_resources *cres, 348 - loff_t *_start, size_t *_len, loff_t i_size); 358 + unsigned long long *_start, 359 + unsigned long long *_len, 360 + unsigned long long i_size); 349 361 350 362 /* Prepare a read operation, shortening it to a cached/uncached 351 363 * boundary as appropriate. 352 364 */ 353 365 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, 354 - loff_t i_size); 366 + unsigned long long i_size); 367 + 368 + /* Prepare a write subrequest, working out if we're allowed to do it 369 + * and finding out the maximum amount of data to gather before 370 + * attempting to submit. If we're not permitted to do it, the 371 + * subrequest should be marked failed. 372 + */ 373 + void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq); 355 374 356 375 /* Prepare a write operation, working out what part of the write we can 357 376 * actually do. ··· 415 410 void netfs_clear_inode_writeback(struct inode *inode, const void *aux); 416 411 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); 417 412 bool netfs_release_folio(struct folio *folio, gfp_t gfp); 418 - int netfs_launder_folio(struct folio *folio); 419 413 420 414 /* VMA operations API. */ 421 415 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); ··· 430 426 iov_iter_extraction_t extraction_flags); 431 427 size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, 432 428 size_t max_size, size_t max_segs); 433 - struct netfs_io_subrequest *netfs_create_write_request( 434 - struct netfs_io_request *wreq, enum netfs_io_source dest, 435 - loff_t start, size_t len, work_func_t worker); 429 + void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); 436 430 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 437 431 bool was_async); 438 432 void netfs_queue_write_request(struct netfs_io_subrequest *subreq); ··· 474 472 #if IS_ENABLED(CONFIG_FSCACHE) 475 473 ctx->cache = NULL; 476 474 #endif 475 + mutex_init(&ctx->wb_lock); 477 476 /* ->releasepage() drives zero_point */ 478 477 if (use_zero_point) { 479 478 ctx->zero_point = ctx->remote_i_size;

+2

include/linux/pagemap.h

··· 40 40 int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); 41 41 int filemap_fdatawait_range_keep_errors(struct address_space *mapping, 42 42 loff_t start_byte, loff_t end_byte); 43 + int filemap_invalidate_inode(struct inode *inode, bool flush, 44 + loff_t start, loff_t end); 43 45 44 46 static inline int filemap_fdatawait(struct address_space *mapping) 45 47 {

+2

include/net/9p/client.h

··· 207 207 int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, 208 208 int *err); 209 209 int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); 210 + struct netfs_io_subrequest; 211 + void p9_client_write_subreq(struct netfs_io_subrequest *subreq); 210 212 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); 211 213 int p9dirent_read(struct p9_client *clnt, char *buf, int len, 212 214 struct p9_dirent *dirent);

+237 -13

include/trace/events/netfs.h

··· 24 24 E_(netfs_read_trace_write_begin, "WRITEBEGN") 25 25 26 26 #define netfs_write_traces \ 27 + EM(netfs_write_trace_copy_to_cache, "COPY2CACH") \ 27 28 EM(netfs_write_trace_dio_write, "DIO-WRITE") \ 28 - EM(netfs_write_trace_launder, "LAUNDER ") \ 29 29 EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ 30 30 EM(netfs_write_trace_writeback, "WRITEBACK") \ 31 31 E_(netfs_write_trace_writethrough, "WRITETHRU") ··· 34 34 EM(NETFS_READAHEAD, "RA") \ 35 35 EM(NETFS_READPAGE, "RP") \ 36 36 EM(NETFS_READ_FOR_WRITE, "RW") \ 37 + EM(NETFS_COPY_TO_CACHE, "CC") \ 37 38 EM(NETFS_WRITEBACK, "WB") \ 38 39 EM(NETFS_WRITETHROUGH, "WT") \ 39 - EM(NETFS_LAUNDER_WRITE, "LW") \ 40 40 EM(NETFS_UNBUFFERED_WRITE, "UW") \ 41 41 EM(NETFS_DIO_READ, "DR") \ 42 42 E_(NETFS_DIO_WRITE, "DW") ··· 44 44 #define netfs_rreq_traces \ 45 45 EM(netfs_rreq_trace_assess, "ASSESS ") \ 46 46 EM(netfs_rreq_trace_copy, "COPY ") \ 47 + EM(netfs_rreq_trace_collect, "COLLECT") \ 47 48 EM(netfs_rreq_trace_done, "DONE ") \ 48 49 EM(netfs_rreq_trace_free, "FREE ") \ 49 50 EM(netfs_rreq_trace_redirty, "REDIRTY") \ 50 51 EM(netfs_rreq_trace_resubmit, "RESUBMT") \ 52 + EM(netfs_rreq_trace_set_pause, "PAUSE ") \ 51 53 EM(netfs_rreq_trace_unlock, "UNLOCK ") \ 52 54 EM(netfs_rreq_trace_unmark, "UNMARK ") \ 53 55 EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ 56 + EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ 54 57 EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ 58 + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ 55 59 E_(netfs_rreq_trace_write_done, "WR-DONE") 56 60 57 61 #define netfs_sreq_sources \ ··· 68 64 E_(NETFS_INVALID_WRITE, "INVL") 69 65 70 66 #define netfs_sreq_traces \ 67 + EM(netfs_sreq_trace_discard, "DSCRD") \ 71 68 EM(netfs_sreq_trace_download_instead, "RDOWN") \ 69 + EM(netfs_sreq_trace_fail, "FAIL ") \ 72 70 EM(netfs_sreq_trace_free, "FREE ") \ 73 71 EM(netfs_sreq_trace_limited, "LIMIT") \ 74 72 EM(netfs_sreq_trace_prepare, "PREP ") \ 73 + EM(netfs_sreq_trace_prep_failed, "PRPFL") \ 75 74 EM(netfs_sreq_trace_resubmit_short, "SHORT") \ 75 + EM(netfs_sreq_trace_retry, "RETRY") \ 76 76 EM(netfs_sreq_trace_submit, "SUBMT") \ 77 77 EM(netfs_sreq_trace_terminated, "TERM ") \ 78 78 EM(netfs_sreq_trace_write, "WRITE") \ ··· 96 88 #define netfs_rreq_ref_traces \ 97 89 EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ 98 90 EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ 91 + EM(netfs_rreq_trace_get_work, "GET WORK ") \ 99 92 EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ 100 93 EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ 101 94 EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ ··· 104 95 EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ 105 96 EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ 106 97 EM(netfs_rreq_trace_put_work, "PUT WORK ") \ 98 + EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ 99 + EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ 107 100 EM(netfs_rreq_trace_see_work, "SEE WORK ") \ 108 101 E_(netfs_rreq_trace_new, "NEW ") 109 102 110 103 #define netfs_sreq_ref_traces \ 111 104 EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ 112 105 EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ 106 + EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ 113 107 EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ 114 108 EM(netfs_sreq_trace_new, "NEW ") \ 109 + EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ 115 110 EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ 116 111 EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ 112 + EM(netfs_sreq_trace_put_done, "PUT DONE ") \ 117 113 EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ 118 114 EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ 119 115 EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \ 116 + EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ 120 117 EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ 121 118 EM(netfs_sreq_trace_put_work, "PUT WORK ") \ 122 119 E_(netfs_sreq_trace_put_terminated, "PUT TERM ") ··· 139 124 EM(netfs_streaming_filled_page, "mod-streamw-f") \ 140 125 EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ 141 126 /* The rest are for writeback */ \ 127 + EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ 142 128 EM(netfs_folio_trace_clear, "clear") \ 143 - EM(netfs_folio_trace_clear_s, "clear-s") \ 129 + EM(netfs_folio_trace_clear_cc, "clear-cc") \ 144 130 EM(netfs_folio_trace_clear_g, "clear-g") \ 145 - EM(netfs_folio_trace_copy_to_cache, "copy") \ 146 - EM(netfs_folio_trace_end_copy, "end-copy") \ 131 + EM(netfs_folio_trace_clear_s, "clear-s") \ 132 + EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ 147 133 EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ 148 134 EM(netfs_folio_trace_kill, "kill") \ 149 - EM(netfs_folio_trace_launder, "launder") \ 135 + EM(netfs_folio_trace_kill_cc, "kill-cc") \ 136 + EM(netfs_folio_trace_kill_g, "kill-g") \ 137 + EM(netfs_folio_trace_kill_s, "kill-s") \ 150 138 EM(netfs_folio_trace_mkwrite, "mkwrite") \ 151 139 EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ 140 + EM(netfs_folio_trace_not_under_wback, "!wback") \ 152 141 EM(netfs_folio_trace_read_gaps, "read-gaps") \ 153 - EM(netfs_folio_trace_redirty, "redirty") \ 154 142 EM(netfs_folio_trace_redirtied, "redirtied") \ 155 143 EM(netfs_folio_trace_store, "store") \ 144 + EM(netfs_folio_trace_store_copy, "store-copy") \ 156 145 EM(netfs_folio_trace_store_plus, "store+") \ 157 146 EM(netfs_folio_trace_wthru, "wthru") \ 158 147 E_(netfs_folio_trace_wthru_plus, "wthru+") 148 + 149 + #define netfs_collect_contig_traces \ 150 + EM(netfs_contig_trace_collect, "Collect") \ 151 + EM(netfs_contig_trace_jump, "-->JUMP-->") \ 152 + E_(netfs_contig_trace_unlock, "Unlock") 159 153 160 154 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY 161 155 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY ··· 182 158 enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); 183 159 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); 184 160 enum netfs_folio_trace { netfs_folio_traces } __mode(byte); 161 + enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); 185 162 186 163 #endif 187 164 ··· 204 179 netfs_rreq_ref_traces; 205 180 netfs_sreq_ref_traces; 206 181 netfs_folio_traces; 182 + netfs_collect_contig_traces; 207 183 208 184 /* 209 185 * Now redefine the EM() and E_() macros to map the enums to the strings that ··· 305 279 __entry->start = sreq->start; 306 280 ), 307 281 308 - TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d", 282 + TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d", 309 283 __entry->rreq, __entry->index, 310 284 __print_symbolic(__entry->source, netfs_sreq_sources), 311 285 __print_symbolic(__entry->what, netfs_sreq_traces), ··· 345 319 __entry->start = sreq ? sreq->start : 0; 346 320 ), 347 321 348 - TP_printk("R=%08x[%d] %s f=%02x s=%llx %zx/%zx %s e=%d", 322 + TP_printk("R=%08x[%x] %s f=%02x s=%llx %zx/%zx %s e=%d", 349 323 __entry->rreq, __entry->index, 350 324 __print_symbolic(__entry->source, netfs_sreq_sources), 351 325 __entry->flags, ··· 438 412 __field(unsigned long long, start ) 439 413 __field(size_t, len ) 440 414 __field(unsigned int, flags ) 415 + __field(unsigned int, ino ) 441 416 ), 442 417 443 418 TP_fast_assign( 444 419 __entry->start = iocb->ki_pos; 445 420 __entry->len = iov_iter_count(from); 421 + __entry->ino = iocb->ki_filp->f_inode->i_ino; 446 422 __entry->flags = iocb->ki_flags; 447 423 ), 448 424 449 - TP_printk("WRITE-ITER s=%llx l=%zx f=%x", 450 - __entry->start, __entry->len, __entry->flags) 425 + TP_printk("WRITE-ITER i=%x s=%llx l=%zx f=%x", 426 + __entry->ino, __entry->start, __entry->len, __entry->flags) 451 427 ); 452 428 453 429 TRACE_EVENT(netfs_write, ··· 461 433 TP_STRUCT__entry( 462 434 __field(unsigned int, wreq ) 463 435 __field(unsigned int, cookie ) 436 + __field(unsigned int, ino ) 464 437 __field(enum netfs_write_trace, what ) 465 438 __field(unsigned long long, start ) 466 - __field(size_t, len ) 439 + __field(unsigned long long, len ) 467 440 ), 468 441 469 442 TP_fast_assign( ··· 472 443 struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); 473 444 __entry->wreq = wreq->debug_id; 474 445 __entry->cookie = __cookie ? __cookie->debug_id : 0; 446 + __entry->ino = wreq->inode->i_ino; 475 447 __entry->what = what; 476 448 __entry->start = wreq->start; 477 449 __entry->len = wreq->len; 478 450 ), 479 451 480 - TP_printk("R=%08x %s c=%08x by=%llx-%llx", 452 + TP_printk("R=%08x %s c=%08x i=%x by=%llx-%llx", 481 453 __entry->wreq, 482 454 __print_symbolic(__entry->what, netfs_write_traces), 483 455 __entry->cookie, 456 + __entry->ino, 484 457 __entry->start, __entry->start + __entry->len - 1) 458 + ); 459 + 460 + TRACE_EVENT(netfs_collect, 461 + TP_PROTO(const struct netfs_io_request *wreq), 462 + 463 + TP_ARGS(wreq), 464 + 465 + TP_STRUCT__entry( 466 + __field(unsigned int, wreq ) 467 + __field(unsigned int, len ) 468 + __field(unsigned long long, transferred ) 469 + __field(unsigned long long, start ) 470 + ), 471 + 472 + TP_fast_assign( 473 + __entry->wreq = wreq->debug_id; 474 + __entry->start = wreq->start; 475 + __entry->len = wreq->len; 476 + __entry->transferred = wreq->transferred; 477 + ), 478 + 479 + TP_printk("R=%08x s=%llx-%llx", 480 + __entry->wreq, 481 + __entry->start + __entry->transferred, 482 + __entry->start + __entry->len) 483 + ); 484 + 485 + TRACE_EVENT(netfs_collect_contig, 486 + TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to, 487 + enum netfs_collect_contig_trace type), 488 + 489 + TP_ARGS(wreq, to, type), 490 + 491 + TP_STRUCT__entry( 492 + __field(unsigned int, wreq) 493 + __field(enum netfs_collect_contig_trace, type) 494 + __field(unsigned long long, contiguity) 495 + __field(unsigned long long, to) 496 + ), 497 + 498 + TP_fast_assign( 499 + __entry->wreq = wreq->debug_id; 500 + __entry->type = type; 501 + __entry->contiguity = wreq->contiguity; 502 + __entry->to = to; 503 + ), 504 + 505 + TP_printk("R=%08x %llx -> %llx %s", 506 + __entry->wreq, 507 + __entry->contiguity, 508 + __entry->to, 509 + __print_symbolic(__entry->type, netfs_collect_contig_traces)) 510 + ); 511 + 512 + TRACE_EVENT(netfs_collect_sreq, 513 + TP_PROTO(const struct netfs_io_request *wreq, 514 + const struct netfs_io_subrequest *subreq), 515 + 516 + TP_ARGS(wreq, subreq), 517 + 518 + TP_STRUCT__entry( 519 + __field(unsigned int, wreq ) 520 + __field(unsigned int, subreq ) 521 + __field(unsigned int, stream ) 522 + __field(unsigned int, len ) 523 + __field(unsigned int, transferred ) 524 + __field(unsigned long long, start ) 525 + ), 526 + 527 + TP_fast_assign( 528 + __entry->wreq = wreq->debug_id; 529 + __entry->subreq = subreq->debug_index; 530 + __entry->stream = subreq->stream_nr; 531 + __entry->start = subreq->start; 532 + __entry->len = subreq->len; 533 + __entry->transferred = subreq->transferred; 534 + ), 535 + 536 + TP_printk("R=%08x[%u:%02x] s=%llx t=%x/%x", 537 + __entry->wreq, __entry->stream, __entry->subreq, 538 + __entry->start, __entry->transferred, __entry->len) 539 + ); 540 + 541 + TRACE_EVENT(netfs_collect_folio, 542 + TP_PROTO(const struct netfs_io_request *wreq, 543 + const struct folio *folio, 544 + unsigned long long fend, 545 + unsigned long long collected_to), 546 + 547 + TP_ARGS(wreq, folio, fend, collected_to), 548 + 549 + TP_STRUCT__entry( 550 + __field(unsigned int, wreq ) 551 + __field(unsigned long, index ) 552 + __field(unsigned long long, fend ) 553 + __field(unsigned long long, cleaned_to ) 554 + __field(unsigned long long, collected_to ) 555 + ), 556 + 557 + TP_fast_assign( 558 + __entry->wreq = wreq->debug_id; 559 + __entry->index = folio->index; 560 + __entry->fend = fend; 561 + __entry->cleaned_to = wreq->cleaned_to; 562 + __entry->collected_to = collected_to; 563 + ), 564 + 565 + TP_printk("R=%08x ix=%05lx r=%llx-%llx t=%llx/%llx", 566 + __entry->wreq, __entry->index, 567 + (unsigned long long)__entry->index * PAGE_SIZE, __entry->fend, 568 + __entry->cleaned_to, __entry->collected_to) 569 + ); 570 + 571 + TRACE_EVENT(netfs_collect_state, 572 + TP_PROTO(const struct netfs_io_request *wreq, 573 + unsigned long long collected_to, 574 + unsigned int notes), 575 + 576 + TP_ARGS(wreq, collected_to, notes), 577 + 578 + TP_STRUCT__entry( 579 + __field(unsigned int, wreq ) 580 + __field(unsigned int, notes ) 581 + __field(unsigned long long, collected_to ) 582 + __field(unsigned long long, cleaned_to ) 583 + __field(unsigned long long, contiguity ) 584 + ), 585 + 586 + TP_fast_assign( 587 + __entry->wreq = wreq->debug_id; 588 + __entry->notes = notes; 589 + __entry->collected_to = collected_to; 590 + __entry->cleaned_to = wreq->cleaned_to; 591 + __entry->contiguity = wreq->contiguity; 592 + ), 593 + 594 + TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x", 595 + __entry->wreq, __entry->collected_to, 596 + __entry->cleaned_to, __entry->contiguity, 597 + __entry->notes) 598 + ); 599 + 600 + TRACE_EVENT(netfs_collect_gap, 601 + TP_PROTO(const struct netfs_io_request *wreq, 602 + const struct netfs_io_stream *stream, 603 + unsigned long long jump_to, char type), 604 + 605 + TP_ARGS(wreq, stream, jump_to, type), 606 + 607 + TP_STRUCT__entry( 608 + __field(unsigned int, wreq) 609 + __field(unsigned char, stream) 610 + __field(unsigned char, type) 611 + __field(unsigned long long, from) 612 + __field(unsigned long long, to) 613 + ), 614 + 615 + TP_fast_assign( 616 + __entry->wreq = wreq->debug_id; 617 + __entry->stream = stream->stream_nr; 618 + __entry->from = stream->collected_to; 619 + __entry->to = jump_to; 620 + __entry->type = type; 621 + ), 622 + 623 + TP_printk("R=%08x[%x:] %llx->%llx %c", 624 + __entry->wreq, __entry->stream, 625 + __entry->from, __entry->to, __entry->type) 626 + ); 627 + 628 + TRACE_EVENT(netfs_collect_stream, 629 + TP_PROTO(const struct netfs_io_request *wreq, 630 + const struct netfs_io_stream *stream), 631 + 632 + TP_ARGS(wreq, stream), 633 + 634 + TP_STRUCT__entry( 635 + __field(unsigned int, wreq) 636 + __field(unsigned char, stream) 637 + __field(unsigned long long, collected_to) 638 + __field(unsigned long long, front) 639 + ), 640 + 641 + TP_fast_assign( 642 + __entry->wreq = wreq->debug_id; 643 + __entry->stream = stream->stream_nr; 644 + __entry->collected_to = stream->collected_to; 645 + __entry->front = stream->front ? stream->front->start : UINT_MAX; 646 + ), 647 + 648 + TP_printk("R=%08x[%x:] cto=%llx frn=%llx", 649 + __entry->wreq, __entry->stream, 650 + __entry->collected_to, __entry->front) 485 651 ); 486 652 487 653 #undef EM

+57 -3

mm/filemap.c

··· 1540 1540 * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. 1541 1541 * @folio: The folio to wait on. 1542 1542 * 1543 - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. 1543 + * Wait for PG_private_2 to be cleared on a folio. 1544 1544 */ 1545 1545 void folio_wait_private_2(struct folio *folio) 1546 1546 { ··· 1553 1553 * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. 1554 1554 * @folio: The folio to wait on. 1555 1555 * 1556 - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a 1557 - * fatal signal is received by the calling task. 1556 + * Wait for PG_private_2 to be cleared on a folio or until a fatal signal is 1557 + * received by the calling task. 1558 1558 * 1559 1559 * Return: 1560 1560 * - 0 if successful. ··· 4133 4133 return try_to_free_buffers(folio); 4134 4134 } 4135 4135 EXPORT_SYMBOL(filemap_release_folio); 4136 + 4137 + /** 4138 + * filemap_invalidate_inode - Invalidate/forcibly write back a range of an inode's pagecache 4139 + * @inode: The inode to flush 4140 + * @flush: Set to write back rather than simply invalidate. 4141 + * @start: First byte to in range. 4142 + * @end: Last byte in range (inclusive), or LLONG_MAX for everything from start 4143 + * onwards. 4144 + * 4145 + * Invalidate all the folios on an inode that contribute to the specified 4146 + * range, possibly writing them back first. Whilst the operation is 4147 + * undertaken, the invalidate lock is held to prevent new folios from being 4148 + * installed. 4149 + */ 4150 + int filemap_invalidate_inode(struct inode *inode, bool flush, 4151 + loff_t start, loff_t end) 4152 + { 4153 + struct address_space *mapping = inode->i_mapping; 4154 + pgoff_t first = start >> PAGE_SHIFT; 4155 + pgoff_t last = end >> PAGE_SHIFT; 4156 + pgoff_t nr = end == LLONG_MAX ? ULONG_MAX : last - first + 1; 4157 + 4158 + if (!mapping || !mapping->nrpages || end < start) 4159 + goto out; 4160 + 4161 + /* Prevent new folios from being added to the inode. */ 4162 + filemap_invalidate_lock(mapping); 4163 + 4164 + if (!mapping->nrpages) 4165 + goto unlock; 4166 + 4167 + unmap_mapping_pages(mapping, first, nr, false); 4168 + 4169 + /* Write back the data if we're asked to. */ 4170 + if (flush) { 4171 + struct writeback_control wbc = { 4172 + .sync_mode = WB_SYNC_ALL, 4173 + .nr_to_write = LONG_MAX, 4174 + .range_start = start, 4175 + .range_end = end, 4176 + }; 4177 + 4178 + filemap_fdatawrite_wbc(mapping, &wbc); 4179 + } 4180 + 4181 + /* Wait for writeback to complete on all folios and discard. */ 4182 + truncate_inode_pages_range(mapping, start, end); 4183 + 4184 + unlock: 4185 + filemap_invalidate_unlock(mapping); 4186 + out: 4187 + return filemap_check_errors(mapping); 4188 + } 4189 + EXPORT_SYMBOL_GPL(filemap_invalidate_inode); 4136 4190 4137 4191 #ifdef CONFIG_CACHESTAT_SYSCALL 4138 4192 /**

+1

mm/page-writeback.c

··· 2546 2546 folio_batch_release(&wbc->fbatch); 2547 2547 return NULL; 2548 2548 } 2549 + EXPORT_SYMBOL_GPL(writeback_iter); 2549 2550 2550 2551 /** 2551 2552 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.

+1

net/9p/Kconfig

··· 5 5 6 6 menuconfig NET_9P 7 7 tristate "Plan 9 Resource Sharing Support (9P2000)" 8 + select NETFS_SUPPORT 8 9 help 9 10 If you say Y here, you will get experimental support for 10 11 Plan 9 resource sharing via the 9P2000 protocol.

+49

net/9p/client.c

··· 18 18 #include <linux/sched/signal.h> 19 19 #include <linux/uaccess.h> 20 20 #include <linux/uio.h> 21 + #include <linux/netfs.h> 21 22 #include <net/9p/9p.h> 22 23 #include <linux/parser.h> 23 24 #include <linux/seq_file.h> ··· 1661 1660 return total; 1662 1661 } 1663 1662 EXPORT_SYMBOL(p9_client_write); 1663 + 1664 + void 1665 + p9_client_write_subreq(struct netfs_io_subrequest *subreq) 1666 + { 1667 + struct netfs_io_request *wreq = subreq->rreq; 1668 + struct p9_fid *fid = wreq->netfs_priv; 1669 + struct p9_client *clnt = fid->clnt; 1670 + struct p9_req_t *req; 1671 + unsigned long long start = subreq->start + subreq->transferred; 1672 + int written, len = subreq->len - subreq->transferred; 1673 + int err; 1674 + 1675 + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n", 1676 + fid->fid, start, len); 1677 + 1678 + /* Don't bother zerocopy for small IO (< 1024) */ 1679 + if (clnt->trans_mod->zc_request && len > 1024) { 1680 + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter, 1681 + 0, wreq->len, P9_ZC_HDR_SZ, "dqd", 1682 + fid->fid, start, len); 1683 + } else { 1684 + req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, 1685 + start, len, &subreq->io_iter); 1686 + } 1687 + if (IS_ERR(req)) { 1688 + netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); 1689 + return; 1690 + } 1691 + 1692 + err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); 1693 + if (err) { 1694 + trace_9p_protocol_dump(clnt, &req->rc); 1695 + p9_req_put(clnt, req); 1696 + netfs_write_subrequest_terminated(subreq, err, false); 1697 + return; 1698 + } 1699 + 1700 + if (written > len) { 1701 + pr_err("bogus RWRITE count (%d > %u)\n", written, len); 1702 + written = len; 1703 + } 1704 + 1705 + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); 1706 + 1707 + p9_req_put(clnt, req); 1708 + netfs_write_subrequest_terminated(subreq, written, false); 1709 + } 1710 + EXPORT_SYMBOL(p9_client_write_subreq); 1664 1711 1665 1712 struct p9_wstat *p9_client_stat(struct p9_fid *fid) 1666 1713 {