commit b7561e51868c929c40b26e45e8e63a6aa5aa211f

+1 -1

fs/nfs/callback_proc.c

··· 51 goto out_iput; 52 res->size = i_size_read(inode); 53 res->change_attr = delegation->change_attr; 54 - if (nfsi->nrequests != 0) 55 res->change_attr++; 56 res->ctime = inode->i_ctime; 57 res->mtime = inode->i_mtime;

··· 51 goto out_iput; 52 res->size = i_size_read(inode); 53 res->change_attr = delegation->change_attr; 54 + if (nfs_have_writebacks(inode)) 55 res->change_attr++; 56 res->ctime = inode->i_ctime; 57 res->mtime = inode->i_mtime;

+1 -1

fs/nfs/delegation.c

··· 1089 delegation = rcu_dereference(nfsi->delegation); 1090 if (delegation == NULL || !(delegation->type & FMODE_WRITE)) 1091 goto out; 1092 - if (nfsi->nrequests < delegation->pagemod_limit) 1093 ret = false; 1094 out: 1095 rcu_read_unlock();

··· 1089 delegation = rcu_dereference(nfsi->delegation); 1090 if (delegation == NULL || !(delegation->type & FMODE_WRITE)) 1091 goto out; 1092 + if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit) 1093 ret = false; 1094 out: 1095 rcu_read_unlock();

+2 -2

fs/nfs/direct.c

··· 616 struct list_head *list, 617 struct nfs_commit_info *cinfo) 618 { 619 - spin_lock(&cinfo->inode->i_lock); 620 #ifdef CONFIG_NFS_V4_1 621 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) 622 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 623 #endif 624 nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 625 - spin_unlock(&cinfo->inode->i_lock); 626 } 627 628 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)

··· 616 struct list_head *list, 617 struct nfs_commit_info *cinfo) 618 { 619 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 620 #ifdef CONFIG_NFS_V4_1 621 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) 622 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 623 #endif 624 nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 625 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 626 } 627 628 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)

+5 -5

fs/nfs/inode.c

··· 1285 1286 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1287 { 1288 - struct nfs_inode *nfsi = NFS_I(inode); 1289 unsigned long ret = 0; 1290 1291 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) ··· 1314 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1315 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 1316 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 1317 - && nfsi->nrequests == 0) { 1318 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 1319 ret |= NFS_INO_INVALID_ATTR; 1320 } ··· 1822 if (new_isize != cur_isize) { 1823 /* Do we perhaps have any outstanding writes, or has 1824 * the file grown beyond our last write? */ 1825 - if (nfsi->nrequests == 0 || new_isize > cur_isize) { 1826 i_size_write(inode, new_isize); 1827 if (!have_writers) 1828 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ··· 2011 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 2012 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 2013 INIT_LIST_HEAD(&nfsi->commit_info.list); 2014 - nfsi->nrequests = 0; 2015 - nfsi->commit_info.ncommit = 0; 2016 atomic_set(&nfsi->commit_info.rpcs_out, 0); 2017 init_rwsem(&nfsi->rmdir_sem); 2018 nfs4_init_once(nfsi); 2019 } 2020

··· 1285 1286 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1287 { 1288 unsigned long ret = 0; 1289 1290 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) ··· 1315 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1316 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 1317 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 1318 + && !nfs_have_writebacks(inode)) { 1319 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 1320 ret |= NFS_INO_INVALID_ATTR; 1321 } ··· 1823 if (new_isize != cur_isize) { 1824 /* Do we perhaps have any outstanding writes, or has 1825 * the file grown beyond our last write? */ 1826 + if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { 1827 i_size_write(inode, new_isize); 1828 if (!have_writers) 1829 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ··· 2012 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 2013 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 2014 INIT_LIST_HEAD(&nfsi->commit_info.list); 2015 + atomic_long_set(&nfsi->nrequests, 0); 2016 + atomic_long_set(&nfsi->commit_info.ncommit, 0); 2017 atomic_set(&nfsi->commit_info.rpcs_out, 0); 2018 init_rwsem(&nfsi->rmdir_sem); 2019 + mutex_init(&nfsi->commit_mutex); 2020 nfs4_init_once(nfsi); 2021 } 2022

+19 -48

fs/nfs/pagelist.c

··· 134 /* 135 * nfs_page_group_lock - lock the head of the page group 136 * @req - request in group that is to be locked 137 - * @nonblock - if true don't block waiting for lock 138 * 139 - * this lock must be held if modifying the page group list 140 * 141 - * return 0 on success, < 0 on error: -EDELAY if nonblocking or the 142 - * result from wait_on_bit_lock 143 - * 144 - * NOTE: calling with nonblock=false should always have set the 145 - * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock 146 - * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. 147 */ 148 int 149 - nfs_page_group_lock(struct nfs_page *req, bool nonblock) 150 { 151 struct nfs_page *head = req->wb_head; 152 ··· 150 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) 151 return 0; 152 153 - if (!nonblock) { 154 - set_bit(PG_CONTENDED1, &head->wb_flags); 155 - smp_mb__after_atomic(); 156 - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 157 - TASK_UNINTERRUPTIBLE); 158 - } 159 - 160 - return -EAGAIN; 161 - } 162 - 163 - /* 164 - * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it 165 - * @req - a request in the group 166 - * 167 - * This is a blocking call to wait for the group lock to be cleared. 168 - */ 169 - void 170 - nfs_page_group_lock_wait(struct nfs_page *req) 171 - { 172 - struct nfs_page *head = req->wb_head; 173 - 174 - WARN_ON_ONCE(head != head->wb_head); 175 - 176 - if (!test_bit(PG_HEADLOCK, &head->wb_flags)) 177 - return; 178 set_bit(PG_CONTENDED1, &head->wb_flags); 179 smp_mb__after_atomic(); 180 - wait_on_bit(&head->wb_flags, PG_HEADLOCK, 181 - TASK_UNINTERRUPTIBLE); 182 } 183 184 /* ··· 216 { 217 bool ret; 218 219 - nfs_page_group_lock(req, false); 220 ret = nfs_page_group_sync_on_bit_locked(req, bit); 221 nfs_page_group_unlock(req); 222 ··· 258 inode = page_file_mapping(req->wb_page)->host; 259 set_bit(PG_INODE_REF, &req->wb_flags); 260 kref_get(&req->wb_kref); 261 - spin_lock(&inode->i_lock); 262 - NFS_I(inode)->nrequests++; 263 - spin_unlock(&inode->i_lock); 264 } 265 } 266 } ··· 274 nfs_page_group_destroy(struct kref *kref) 275 { 276 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 277 struct nfs_page *tmp, *next; 278 279 - /* subrequests must release the ref on the head request */ 280 - if (req->wb_head != req) 281 - nfs_release_request(req->wb_head); 282 - 283 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) 284 - return; 285 286 tmp = req; 287 do { ··· 289 nfs_free_request(tmp); 290 tmp = next; 291 } while (tmp != req); 292 } 293 294 /** ··· 434 { 435 kref_put(&req->wb_kref, nfs_page_group_destroy); 436 } 437 438 /** 439 * nfs_wait_on_request - Wait for a request to complete. ··· 453 return wait_on_bit_io(&req->wb_flags, PG_BUSY, 454 TASK_UNINTERRUPTIBLE); 455 } 456 457 /* 458 * nfs_generic_pg_test - determine if requests can be coalesced ··· 1007 unsigned int bytes_left = 0; 1008 unsigned int offset, pgbase; 1009 1010 - nfs_page_group_lock(req, false); 1011 1012 subreq = req; 1013 bytes_left = subreq->wb_bytes; ··· 1029 if (mirror->pg_recoalesce) 1030 return 0; 1031 /* retry add_request for this subreq */ 1032 - nfs_page_group_lock(req, false); 1033 continue; 1034 } 1035 ··· 1126 1127 for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1128 if (midx) { 1129 - nfs_page_group_lock(req, false); 1130 1131 /* find the last request */ 1132 for (lastreq = req->wb_head;

··· 134 /* 135 * nfs_page_group_lock - lock the head of the page group 136 * @req - request in group that is to be locked 137 * 138 + * this lock must be held when traversing or modifying the page 139 + * group list 140 * 141 + * return 0 on success, < 0 on error 142 */ 143 int 144 + nfs_page_group_lock(struct nfs_page *req) 145 { 146 struct nfs_page *head = req->wb_head; 147 ··· 155 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) 156 return 0; 157 158 set_bit(PG_CONTENDED1, &head->wb_flags); 159 smp_mb__after_atomic(); 160 + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 161 + TASK_UNINTERRUPTIBLE); 162 } 163 164 /* ··· 246 { 247 bool ret; 248 249 + nfs_page_group_lock(req); 250 ret = nfs_page_group_sync_on_bit_locked(req, bit); 251 nfs_page_group_unlock(req); 252 ··· 288 inode = page_file_mapping(req->wb_page)->host; 289 set_bit(PG_INODE_REF, &req->wb_flags); 290 kref_get(&req->wb_kref); 291 + atomic_long_inc(&NFS_I(inode)->nrequests); 292 } 293 } 294 } ··· 306 nfs_page_group_destroy(struct kref *kref) 307 { 308 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 309 + struct nfs_page *head = req->wb_head; 310 struct nfs_page *tmp, *next; 311 312 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) 313 + goto out; 314 315 tmp = req; 316 do { ··· 324 nfs_free_request(tmp); 325 tmp = next; 326 } while (tmp != req); 327 + out: 328 + /* subrequests must release the ref on the head request */ 329 + if (head != req) 330 + nfs_release_request(head); 331 } 332 333 /** ··· 465 { 466 kref_put(&req->wb_kref, nfs_page_group_destroy); 467 } 468 + EXPORT_SYMBOL_GPL(nfs_release_request); 469 470 /** 471 * nfs_wait_on_request - Wait for a request to complete. ··· 483 return wait_on_bit_io(&req->wb_flags, PG_BUSY, 484 TASK_UNINTERRUPTIBLE); 485 } 486 + EXPORT_SYMBOL_GPL(nfs_wait_on_request); 487 488 /* 489 * nfs_generic_pg_test - determine if requests can be coalesced ··· 1036 unsigned int bytes_left = 0; 1037 unsigned int offset, pgbase; 1038 1039 + nfs_page_group_lock(req); 1040 1041 subreq = req; 1042 bytes_left = subreq->wb_bytes; ··· 1058 if (mirror->pg_recoalesce) 1059 return 0; 1060 /* retry add_request for this subreq */ 1061 + nfs_page_group_lock(req); 1062 continue; 1063 } 1064 ··· 1155 1156 for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1157 if (midx) { 1158 + nfs_page_group_lock(req); 1159 1160 /* find the last request */ 1161 for (lastreq = req->wb_head;

-41

fs/nfs/pnfs.c

··· 529 } 530 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 531 532 - static void pnfs_free_lseg_async_work(struct work_struct *work) 533 - { 534 - struct pnfs_layout_segment *lseg; 535 - struct pnfs_layout_hdr *lo; 536 - 537 - lseg = container_of(work, struct pnfs_layout_segment, pls_work); 538 - lo = lseg->pls_layout; 539 - 540 - pnfs_free_lseg(lseg); 541 - pnfs_put_layout_hdr(lo); 542 - } 543 - 544 - static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 545 - { 546 - INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 547 - schedule_work(&lseg->pls_work); 548 - } 549 - 550 - void 551 - pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 552 - { 553 - if (!lseg) 554 - return; 555 - 556 - assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 557 - 558 - dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 559 - atomic_read(&lseg->pls_refcount), 560 - test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 561 - if (atomic_dec_and_test(&lseg->pls_refcount)) { 562 - struct pnfs_layout_hdr *lo = lseg->pls_layout; 563 - if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 564 - return; 565 - pnfs_layout_remove_lseg(lo, lseg); 566 - if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { 567 - pnfs_get_layout_hdr(lo); 568 - pnfs_free_lseg_async(lseg); 569 - } 570 - } 571 - } 572 - 573 /* 574 * is l2 fully contained in l1? 575 * start1 end1

··· 529 } 530 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 531 532 /* 533 * is l2 fully contained in l1? 534 * start1 end1

-2

fs/nfs/pnfs.h

··· 67 u32 pls_seq; 68 unsigned long pls_flags; 69 struct pnfs_layout_hdr *pls_layout; 70 - struct work_struct pls_work; 71 }; 72 73 enum pnfs_try_status { ··· 229 /* pnfs.c */ 230 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 231 void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 232 - void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); 233 234 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); 235 void unset_pnfs_layoutdriver(struct nfs_server *);

··· 67 u32 pls_seq; 68 unsigned long pls_flags; 69 struct pnfs_layout_hdr *pls_layout; 70 }; 71 72 enum pnfs_try_status { ··· 230 /* pnfs.c */ 231 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 232 void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 233 234 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); 235 void unset_pnfs_layoutdriver(struct nfs_server *);

+23 -14

fs/nfs/pnfs_nfs.c

··· 83 } 84 out: 85 nfs_request_remove_commit_list(req, cinfo); 86 - pnfs_put_lseg_locked(freeme); 87 } 88 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); 89 ··· 91 pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 struct nfs_commit_info *cinfo, int max) 93 { 94 - struct nfs_page *req, *tmp; 95 int ret = 0; 96 97 - list_for_each_entry_safe(req, tmp, src, wb_list) { 98 - if (!nfs_lock_request(req)) 99 - continue; 100 kref_get(&req->wb_kref); 101 - if (cond_resched_lock(&cinfo->inode->i_lock)) 102 - list_safe_reset_next(req, tmp, wb_list); 103 nfs_request_remove_commit_list(req, cinfo); 104 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 105 nfs_list_add_request(req, dst); 106 ret++; 107 if ((ret == max) && !cinfo->dreq) 108 break; 109 } 110 return ret; 111 } ··· 128 struct list_head *dst = &bucket->committing; 129 int ret; 130 131 - lockdep_assert_held(&cinfo->inode->i_lock); 132 ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); 133 if (ret) { 134 cinfo->ds->nwritten -= ret; ··· 136 if (bucket->clseg == NULL) 137 bucket->clseg = pnfs_get_lseg(bucket->wlseg); 138 if (list_empty(src)) { 139 - pnfs_put_lseg_locked(bucket->wlseg); 140 bucket->wlseg = NULL; 141 } 142 } ··· 151 { 152 int i, rv = 0, cnt; 153 154 - lockdep_assert_held(&cinfo->inode->i_lock); 155 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { 156 cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], 157 cinfo, max); ··· 171 int nwritten; 172 int i; 173 174 - lockdep_assert_held(&cinfo->inode->i_lock); 175 restart: 176 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 177 nwritten = pnfs_generic_transfer_commit_list(&b->written, ··· 962 struct list_head *list; 963 struct pnfs_commit_bucket *buckets; 964 965 - spin_lock(&cinfo->inode->i_lock); 966 buckets = cinfo->ds->buckets; 967 list = &buckets[ds_commit_idx].written; 968 if (list_empty(list)) { 969 if (!pnfs_is_valid_lseg(lseg)) { 970 - spin_unlock(&cinfo->inode->i_lock); 971 cinfo->completion_ops->resched_write(cinfo, req); 972 return; 973 } ··· 984 cinfo->ds->nwritten++; 985 986 nfs_request_add_commit_list_locked(req, list, cinfo); 987 - spin_unlock(&cinfo->inode->i_lock); 988 nfs_mark_page_unstable(req->wb_page, cinfo); 989 } 990 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);

··· 83 } 84 out: 85 nfs_request_remove_commit_list(req, cinfo); 86 + pnfs_put_lseg(freeme); 87 } 88 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); 89 ··· 91 pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 struct nfs_commit_info *cinfo, int max) 93 { 94 + struct nfs_page *req; 95 int ret = 0; 96 97 + while(!list_empty(src)) { 98 + req = list_first_entry(src, struct nfs_page, wb_list); 99 + 100 kref_get(&req->wb_kref); 101 + if (!nfs_lock_request(req)) { 102 + int status; 103 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 104 + status = nfs_wait_on_request(req); 105 + nfs_release_request(req); 106 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 107 + if (status < 0) 108 + break; 109 + continue; 110 + } 111 nfs_request_remove_commit_list(req, cinfo); 112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 113 nfs_list_add_request(req, dst); 114 ret++; 115 if ((ret == max) && !cinfo->dreq) 116 break; 117 + cond_resched(); 118 } 119 return ret; 120 } ··· 119 struct list_head *dst = &bucket->committing; 120 int ret; 121 122 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 123 ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); 124 if (ret) { 125 cinfo->ds->nwritten -= ret; ··· 127 if (bucket->clseg == NULL) 128 bucket->clseg = pnfs_get_lseg(bucket->wlseg); 129 if (list_empty(src)) { 130 + pnfs_put_lseg(bucket->wlseg); 131 bucket->wlseg = NULL; 132 } 133 } ··· 142 { 143 int i, rv = 0, cnt; 144 145 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 146 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { 147 cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], 148 cinfo, max); ··· 162 int nwritten; 163 int i; 164 165 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 166 restart: 167 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 168 nwritten = pnfs_generic_transfer_commit_list(&b->written, ··· 953 struct list_head *list; 954 struct pnfs_commit_bucket *buckets; 955 956 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 957 buckets = cinfo->ds->buckets; 958 list = &buckets[ds_commit_idx].written; 959 if (list_empty(list)) { 960 if (!pnfs_is_valid_lseg(lseg)) { 961 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 962 cinfo->completion_ops->resched_write(cinfo, req); 963 return; 964 } ··· 975 cinfo->ds->nwritten++; 976 977 nfs_request_add_commit_list_locked(req, list, cinfo); 978 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 979 nfs_mark_page_unstable(req->wb_page, cinfo); 980 } 981 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);

+208 -236

fs/nfs/write.c

··· 154 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 155 } 156 157 /* 158 * nfs_page_find_head_request_locked - find head request associated with @page 159 * ··· 170 * returns matching head request with reference held, or NULL if not found. 171 */ 172 static struct nfs_page * 173 - nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) 174 { 175 - struct nfs_page *req = NULL; 176 177 - if (PagePrivate(page)) 178 - req = (struct nfs_page *)page_private(page); 179 - else if (unlikely(PageSwapCache(page))) 180 - req = nfs_page_search_commits_for_head_request_locked(nfsi, 181 - page); 182 - 183 if (req) { 184 WARN_ON_ONCE(req->wb_head != req); 185 kref_get(&req->wb_kref); 186 } 187 188 return req; 189 } 190 ··· 215 */ 216 static struct nfs_page *nfs_page_find_head_request(struct page *page) 217 { 218 - struct inode *inode = page_file_mapping(page)->host; 219 - struct nfs_page *req = NULL; 220 221 - spin_lock(&inode->i_lock); 222 - req = nfs_page_find_head_request_locked(NFS_I(inode), page); 223 - spin_unlock(&inode->i_lock); 224 return req; 225 } 226 ··· 268 { 269 struct nfs_page *req; 270 271 - WARN_ON_ONCE(head != head->wb_head); 272 - WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); 273 - 274 req = head; 275 do { 276 if (page_offset >= req->wb_pgbase && ··· 293 unsigned int pos = 0; 294 unsigned int len = nfs_page_length(req->wb_page); 295 296 - nfs_page_group_lock(req, false); 297 298 - do { 299 tmp = nfs_page_group_search_locked(req->wb_head, pos); 300 - if (tmp) { 301 - /* no way this should happen */ 302 - WARN_ON_ONCE(tmp->wb_pgbase != pos); 303 - pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); 304 - } 305 - } while (tmp && pos < len); 306 307 nfs_page_group_unlock(req); 308 - WARN_ON_ONCE(pos > len); 309 - return pos == len; 310 } 311 312 /* We can set the PG_uptodate flag if we see that a write request ··· 354 { 355 struct inode *inode = page_file_mapping(req->wb_page)->host; 356 struct nfs_server *nfss = NFS_SERVER(inode); 357 358 - if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) 359 return; 360 361 end_page_writeback(req->wb_page); 362 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 363 clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 364 } 365 - 366 - 367 - /* nfs_page_group_clear_bits 368 - * @req - an nfs request 369 - * clears all page group related bits from @req 370 - */ 371 - static void 372 - nfs_page_group_clear_bits(struct nfs_page *req) 373 - { 374 - clear_bit(PG_TEARDOWN, &req->wb_flags); 375 - clear_bit(PG_UNLOCKPAGE, &req->wb_flags); 376 - clear_bit(PG_UPTODATE, &req->wb_flags); 377 - clear_bit(PG_WB_END, &req->wb_flags); 378 - clear_bit(PG_REMOVE, &req->wb_flags); 379 - } 380 - 381 382 /* 383 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req ··· 374 * @inode - inode associated with request page group, must be holding inode lock 375 * @head - head request of page group, must be holding head lock 376 * @req - request that couldn't lock and needs to wait on the req bit lock 377 - * @nonblock - if true, don't actually wait 378 * 379 - * NOTE: this must be called holding page_group bit lock and inode spin lock 380 - * and BOTH will be released before returning. 381 * 382 * returns 0 on success, < 0 on error. 383 */ 384 - static int 385 - nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, 386 - struct nfs_page *req, bool nonblock) 387 - __releases(&inode->i_lock) 388 { 389 struct nfs_page *tmp; 390 - int ret; 391 392 /* relinquish all the locks successfully grabbed this run */ 393 - for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) 394 - nfs_unlock_request(tmp); 395 - 396 - WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); 397 - 398 - /* grab a ref on the request that will be waited on */ 399 - kref_get(&req->wb_kref); 400 - 401 - nfs_page_group_unlock(head); 402 - spin_unlock(&inode->i_lock); 403 - 404 - /* release ref from nfs_page_find_head_request_locked */ 405 - nfs_release_request(head); 406 - 407 - if (!nonblock) 408 - ret = nfs_wait_on_request(req); 409 - else 410 - ret = -EAGAIN; 411 - nfs_release_request(req); 412 - 413 - return ret; 414 } 415 416 /* ··· 406 */ 407 static void 408 nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, 409 - struct nfs_page *old_head) 410 { 411 while (destroy_list) { 412 struct nfs_page *subreq = destroy_list; ··· 418 WARN_ON_ONCE(old_head != subreq->wb_head); 419 420 /* make sure old group is not used */ 421 - subreq->wb_head = subreq; 422 subreq->wb_this_page = subreq; 423 424 /* subreq is now totally disconnected from page group or any 425 * write / commit lists. last chance to wake any waiters */ 426 - nfs_unlock_request(subreq); 427 - 428 - if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { 429 - /* release ref on old head request */ 430 - nfs_release_request(old_head); 431 - 432 - nfs_page_group_clear_bits(subreq); 433 - 434 - /* release the PG_INODE_REF reference */ 435 - if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) 436 - nfs_release_request(subreq); 437 - else 438 - WARN_ON_ONCE(1); 439 - } else { 440 - WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); 441 - /* zombie requests have already released the last 442 - * reference and were waiting on the rest of the 443 - * group to complete. Since it's no longer part of a 444 - * group, simply free the request */ 445 - nfs_page_group_clear_bits(subreq); 446 - nfs_free_request(subreq); 447 - } 448 } 449 } 450 ··· 449 * operations for this page. 450 * 451 * @page - the page used to lookup the "page group" of nfs_page structures 452 - * @nonblock - if true, don't block waiting for request locks 453 * 454 * This function joins all sub requests to the head request by first 455 * locking all requests in the group, cancelling any pending operations ··· 462 * error was encountered. 463 */ 464 static struct nfs_page * 465 - nfs_lock_and_join_requests(struct page *page, bool nonblock) 466 { 467 struct inode *inode = page_file_mapping(page)->host; 468 struct nfs_page *head, *subreq; ··· 471 int ret; 472 473 try_again: 474 - total_bytes = 0; 475 - 476 - WARN_ON_ONCE(destroy_list); 477 - 478 - spin_lock(&inode->i_lock); 479 - 480 /* 481 * A reference is taken only on the head request which acts as a 482 * reference to the whole page group - the group will not be destroyed 483 * until the head reference is released. 484 */ 485 - head = nfs_page_find_head_request_locked(NFS_I(inode), page); 486 - 487 - if (!head) { 488 - spin_unlock(&inode->i_lock); 489 return NULL; 490 } 491 492 - /* holding inode lock, so always make a non-blocking call to try the 493 - * page group lock */ 494 - ret = nfs_page_group_lock(head, true); 495 if (ret < 0) { 496 - spin_unlock(&inode->i_lock); 497 - 498 - if (!nonblock && ret == -EAGAIN) { 499 - nfs_page_group_lock_wait(head); 500 - nfs_release_request(head); 501 - goto try_again; 502 - } 503 - 504 - nfs_release_request(head); 505 return ERR_PTR(ret); 506 } 507 508 /* lock each request in the page group */ 509 - subreq = head; 510 - do { 511 /* 512 * Subrequests are always contiguous, non overlapping 513 * and in order - but may be repeated (mirrored writes). ··· 534 } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || 535 ((subreq->wb_offset + subreq->wb_bytes) > 536 (head->wb_offset + total_bytes)))) { 537 nfs_page_group_unlock(head); 538 - spin_unlock(&inode->i_lock); 539 return ERR_PTR(-EIO); 540 } 541 - 542 - if (!nfs_lock_request(subreq)) { 543 - /* releases page group bit lock and 544 - * inode spin lock and all references */ 545 - ret = nfs_unroll_locks_and_wait(inode, head, 546 - subreq, nonblock); 547 - 548 - if (ret == 0) 549 - goto try_again; 550 - 551 - return ERR_PTR(ret); 552 - } 553 - 554 - subreq = subreq->wb_this_page; 555 - } while (subreq != head); 556 557 /* Now that all requests are locked, make sure they aren't on any list. 558 * Commit list removal accounting is done after locks are dropped */ ··· 561 head->wb_bytes = total_bytes; 562 } 563 564 - /* 565 - * prepare head request to be added to new pgio descriptor 566 - */ 567 - nfs_page_group_clear_bits(head); 568 - 569 - /* 570 - * some part of the group was still on the inode list - otherwise 571 - * the group wouldn't be involved in async write. 572 - * grab a reference for the head request, iff it needs one. 573 - */ 574 - if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) 575 kref_get(&head->wb_kref); 576 577 nfs_page_group_unlock(head); 578 579 - /* drop lock to clean uprequests on destroy list */ 580 - spin_unlock(&inode->i_lock); 581 582 - nfs_destroy_unlinked_subrequests(destroy_list, head); 583 584 - /* still holds ref on head from nfs_page_find_head_request_locked 585 * and still has lock on head from lock loop */ 586 return head; 587 } 588 589 static void nfs_write_error_remove_page(struct nfs_page *req) 590 { 591 - nfs_unlock_request(req); 592 nfs_end_page_writeback(req); 593 generic_error_remove_page(page_file_mapping(req->wb_page), 594 req->wb_page); ··· 608 * May return an error if the user signalled nfs_wait_on_request(). 609 */ 610 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 611 - struct page *page, bool nonblock) 612 { 613 struct nfs_page *req; 614 int ret = 0; 615 616 - req = nfs_lock_and_join_requests(page, nonblock); 617 if (!req) 618 goto out; 619 ret = PTR_ERR(req); ··· 656 int ret; 657 658 nfs_pageio_cond_complete(pgio, page_index(page)); 659 - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 660 if (ret == -EAGAIN) { 661 redirty_page_for_writepage(wbc, page); 662 ret = 0; ··· 743 */ 744 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 745 { 746 struct nfs_inode *nfsi = NFS_I(inode); 747 748 WARN_ON_ONCE(req->wb_this_page != req); ··· 751 /* Lock the request! */ 752 nfs_lock_request(req); 753 754 - spin_lock(&inode->i_lock); 755 - if (!nfsi->nrequests && 756 - NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) 757 - inode->i_version++; 758 /* 759 * Swap-space should not get truncated. Hence no need to plug the race 760 * with invalidate/truncate. 761 */ 762 if (likely(!PageSwapCache(req->wb_page))) { 763 set_bit(PG_MAPPED, &req->wb_flags); 764 SetPagePrivate(req->wb_page); 765 set_page_private(req->wb_page, (unsigned long)req); 766 } 767 - nfsi->nrequests++; 768 /* this a head request for a page group - mark it as having an 769 * extra reference so sub groups can follow suit. 770 * This flag also informs pgio layer when to bump nrequests when 771 * adding subrequests. */ 772 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); 773 kref_get(&req->wb_kref); 774 - spin_unlock(&inode->i_lock); 775 } 776 777 /* ··· 782 */ 783 static void nfs_inode_remove_request(struct nfs_page *req) 784 { 785 - struct inode *inode = d_inode(req->wb_context->dentry); 786 struct nfs_inode *nfsi = NFS_I(inode); 787 struct nfs_page *head; 788 789 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { 790 head = req->wb_head; 791 792 - spin_lock(&inode->i_lock); 793 if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { 794 set_page_private(head->wb_page, 0); 795 ClearPagePrivate(head->wb_page); 796 clear_bit(PG_MAPPED, &head->wb_flags); 797 } 798 - nfsi->nrequests--; 799 - spin_unlock(&inode->i_lock); 800 - } else { 801 - spin_lock(&inode->i_lock); 802 - nfsi->nrequests--; 803 - spin_unlock(&inode->i_lock); 804 } 805 806 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) ··· 853 * number of outstanding requests requiring a commit as well as 854 * the MM page stats. 855 * 856 - * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. 857 */ 858 void 859 nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, ··· 862 { 863 set_bit(PG_CLEAN, &req->wb_flags); 864 nfs_list_add_request(req, dst); 865 - cinfo->mds->ncommit++; 866 } 867 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); 868 ··· 882 void 883 nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) 884 { 885 - spin_lock(&cinfo->inode->i_lock); 886 nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); 887 - spin_unlock(&cinfo->inode->i_lock); 888 if (req->wb_page) 889 nfs_mark_page_unstable(req->wb_page, cinfo); 890 } ··· 908 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 909 return; 910 nfs_list_remove_request(req); 911 - cinfo->mds->ncommit--; 912 } 913 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 914 ··· 953 WB_RECLAIMABLE); 954 } 955 956 - /* Called holding inode (/cinfo) lock */ 957 static void 958 nfs_clear_request_commit(struct nfs_page *req) 959 { ··· 962 struct nfs_commit_info cinfo; 963 964 nfs_init_cinfo_from_inode(&cinfo, inode); 965 if (!pnfs_clear_request_commit(req, &cinfo)) { 966 nfs_request_remove_commit_list(req, &cinfo); 967 } 968 nfs_clear_page_commit(req->wb_page); 969 } 970 } ··· 1011 remove_req: 1012 nfs_inode_remove_request(req); 1013 next: 1014 - nfs_unlock_request(req); 1015 nfs_end_page_writeback(req); 1016 nfs_release_request(req); 1017 } ··· 1022 unsigned long 1023 nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 1024 { 1025 - return cinfo->mds->ncommit; 1026 } 1027 1028 - /* cinfo->inode->i_lock held by caller */ 1029 int 1030 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 1031 struct nfs_commit_info *cinfo, int max) 1032 { 1033 - struct nfs_page *req, *tmp; 1034 int ret = 0; 1035 1036 - list_for_each_entry_safe(req, tmp, src, wb_list) { 1037 - if (!nfs_lock_request(req)) 1038 - continue; 1039 kref_get(&req->wb_kref); 1040 - if (cond_resched_lock(&cinfo->inode->i_lock)) 1041 - list_safe_reset_next(req, tmp, wb_list); 1042 nfs_request_remove_commit_list(req, cinfo); 1043 nfs_list_add_request(req, dst); 1044 ret++; 1045 if ((ret == max) && !cinfo->dreq) 1046 break; 1047 } 1048 return ret; 1049 } ··· 1071 { 1072 int ret = 0; 1073 1074 - spin_lock(&cinfo->inode->i_lock); 1075 - if (cinfo->mds->ncommit > 0) { 1076 const int max = INT_MAX; 1077 1078 ret = nfs_scan_commit_list(&cinfo->mds->list, dst, 1079 cinfo, max); 1080 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); 1081 } 1082 - spin_unlock(&cinfo->inode->i_lock); 1083 return ret; 1084 } 1085 ··· 1102 unsigned int end; 1103 int error; 1104 1105 - if (!PagePrivate(page)) 1106 - return NULL; 1107 - 1108 end = offset + bytes; 1109 - spin_lock(&inode->i_lock); 1110 1111 - for (;;) { 1112 - req = nfs_page_find_head_request_locked(NFS_I(inode), page); 1113 - if (req == NULL) 1114 - goto out_unlock; 1115 1116 - /* should be handled by nfs_flush_incompatible */ 1117 - WARN_ON_ONCE(req->wb_head != req); 1118 - WARN_ON_ONCE(req->wb_this_page != req); 1119 - 1120 - rqend = req->wb_offset + req->wb_bytes; 1121 - /* 1122 - * Tell the caller to flush out the request if 1123 - * the offsets are non-contiguous. 1124 - * Note: nfs_flush_incompatible() will already 1125 - * have flushed out requests having wrong owners. 1126 - */ 1127 - if (offset > rqend 1128 - || end < req->wb_offset) 1129 - goto out_flushme; 1130 - 1131 - if (nfs_lock_request(req)) 1132 - break; 1133 - 1134 - /* The request is locked, so wait and then retry */ 1135 - spin_unlock(&inode->i_lock); 1136 - error = nfs_wait_on_request(req); 1137 - nfs_release_request(req); 1138 - if (error != 0) 1139 - goto out_err; 1140 - spin_lock(&inode->i_lock); 1141 - } 1142 1143 /* Okay, the request matches. Update the region */ 1144 if (offset < req->wb_offset) { ··· 1127 req->wb_bytes = end - req->wb_offset; 1128 else 1129 req->wb_bytes = rqend - req->wb_offset; 1130 - out_unlock: 1131 - if (req) 1132 - nfs_clear_request_commit(req); 1133 - spin_unlock(&inode->i_lock); 1134 return req; 1135 out_flushme: 1136 - spin_unlock(&inode->i_lock); 1137 - nfs_release_request(req); 1138 error = nfs_wb_page(inode, page); 1139 - out_err: 1140 - return ERR_PTR(error); 1141 } 1142 1143 /* ··· 1202 l_ctx = req->wb_lock_context; 1203 do_flush = req->wb_page != page || 1204 !nfs_match_open_context(req->wb_context, ctx); 1205 - /* for now, flush if more than 1 request in page_group */ 1206 - do_flush |= req->wb_this_page != req; 1207 if (l_ctx && flctx && 1208 !(list_empty_careful(&flctx->flc_posix) && 1209 list_empty_careful(&flctx->flc_flock))) { ··· 1385 { 1386 nfs_mark_request_dirty(req); 1387 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); 1388 - nfs_unlock_request(req); 1389 nfs_end_page_writeback(req); 1390 nfs_release_request(req); 1391 } ··· 1906 int ret = 0; 1907 1908 /* no commits means nothing needs to be done */ 1909 - if (!nfsi->commit_info.ncommit) 1910 return ret; 1911 1912 if (wbc->sync_mode == WB_SYNC_NONE) { ··· 1987 1988 /* blocking call to cancel all requests and join to a single (head) 1989 * request */ 1990 - req = nfs_lock_and_join_requests(page, false); 1991 1992 if (IS_ERR(req)) { 1993 ret = PTR_ERR(req);

··· 154 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 155 } 156 157 + static struct nfs_page * 158 + nfs_page_private_request(struct page *page) 159 + { 160 + if (!PagePrivate(page)) 161 + return NULL; 162 + return (struct nfs_page *)page_private(page); 163 + } 164 + 165 /* 166 * nfs_page_find_head_request_locked - find head request associated with @page 167 * ··· 162 * returns matching head request with reference held, or NULL if not found. 163 */ 164 static struct nfs_page * 165 + nfs_page_find_private_request(struct page *page) 166 { 167 + struct address_space *mapping = page_file_mapping(page); 168 + struct nfs_page *req; 169 170 + if (!PagePrivate(page)) 171 + return NULL; 172 + spin_lock(&mapping->private_lock); 173 + req = nfs_page_private_request(page); 174 if (req) { 175 WARN_ON_ONCE(req->wb_head != req); 176 kref_get(&req->wb_kref); 177 } 178 + spin_unlock(&mapping->private_lock); 179 + return req; 180 + } 181 182 + static struct nfs_page * 183 + nfs_page_find_swap_request(struct page *page) 184 + { 185 + struct inode *inode = page_file_mapping(page)->host; 186 + struct nfs_inode *nfsi = NFS_I(inode); 187 + struct nfs_page *req = NULL; 188 + if (!PageSwapCache(page)) 189 + return NULL; 190 + mutex_lock(&nfsi->commit_mutex); 191 + if (PageSwapCache(page)) { 192 + req = nfs_page_search_commits_for_head_request_locked(nfsi, 193 + page); 194 + if (req) { 195 + WARN_ON_ONCE(req->wb_head != req); 196 + kref_get(&req->wb_kref); 197 + } 198 + } 199 + mutex_unlock(&nfsi->commit_mutex); 200 return req; 201 } 202 ··· 187 */ 188 static struct nfs_page *nfs_page_find_head_request(struct page *page) 189 { 190 + struct nfs_page *req; 191 192 + req = nfs_page_find_private_request(page); 193 + if (!req) 194 + req = nfs_page_find_swap_request(page); 195 return req; 196 } 197 ··· 241 { 242 struct nfs_page *req; 243 244 req = head; 245 do { 246 if (page_offset >= req->wb_pgbase && ··· 269 unsigned int pos = 0; 270 unsigned int len = nfs_page_length(req->wb_page); 271 272 + nfs_page_group_lock(req); 273 274 + for (;;) { 275 tmp = nfs_page_group_search_locked(req->wb_head, pos); 276 + if (!tmp) 277 + break; 278 + pos = tmp->wb_pgbase + tmp->wb_bytes; 279 + } 280 281 nfs_page_group_unlock(req); 282 + return pos >= len; 283 } 284 285 /* We can set the PG_uptodate flag if we see that a write request ··· 333 { 334 struct inode *inode = page_file_mapping(req->wb_page)->host; 335 struct nfs_server *nfss = NFS_SERVER(inode); 336 + bool is_done; 337 338 + is_done = nfs_page_group_sync_on_bit(req, PG_WB_END); 339 + nfs_unlock_request(req); 340 + if (!is_done) 341 return; 342 343 end_page_writeback(req->wb_page); 344 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 345 clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 346 } 347 348 /* 349 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req ··· 366 * @inode - inode associated with request page group, must be holding inode lock 367 * @head - head request of page group, must be holding head lock 368 * @req - request that couldn't lock and needs to wait on the req bit lock 369 * 370 + * NOTE: this must be called holding page_group bit lock 371 + * which will be released before returning. 372 * 373 * returns 0 on success, < 0 on error. 374 */ 375 + static void 376 + nfs_unroll_locks(struct inode *inode, struct nfs_page *head, 377 + struct nfs_page *req) 378 { 379 struct nfs_page *tmp; 380 381 /* relinquish all the locks successfully grabbed this run */ 382 + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { 383 + if (!kref_read(&tmp->wb_kref)) 384 + continue; 385 + nfs_unlock_and_release_request(tmp); 386 + } 387 } 388 389 /* ··· 417 */ 418 static void 419 nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, 420 + struct nfs_page *old_head, 421 + struct inode *inode) 422 { 423 while (destroy_list) { 424 struct nfs_page *subreq = destroy_list; ··· 428 WARN_ON_ONCE(old_head != subreq->wb_head); 429 430 /* make sure old group is not used */ 431 subreq->wb_this_page = subreq; 432 + 433 + clear_bit(PG_REMOVE, &subreq->wb_flags); 434 + 435 + /* Note: races with nfs_page_group_destroy() */ 436 + if (!kref_read(&subreq->wb_kref)) { 437 + /* Check if we raced with nfs_page_group_destroy() */ 438 + if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) 439 + nfs_free_request(subreq); 440 + continue; 441 + } 442 + 443 + subreq->wb_head = subreq; 444 + 445 + if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { 446 + nfs_release_request(subreq); 447 + atomic_long_dec(&NFS_I(inode)->nrequests); 448 + } 449 450 /* subreq is now totally disconnected from page group or any 451 * write / commit lists. last chance to wake any waiters */ 452 + nfs_unlock_and_release_request(subreq); 453 } 454 } 455 ··· 464 * operations for this page. 465 * 466 * @page - the page used to lookup the "page group" of nfs_page structures 467 * 468 * This function joins all sub requests to the head request by first 469 * locking all requests in the group, cancelling any pending operations ··· 478 * error was encountered. 479 */ 480 static struct nfs_page * 481 + nfs_lock_and_join_requests(struct page *page) 482 { 483 struct inode *inode = page_file_mapping(page)->host; 484 struct nfs_page *head, *subreq; ··· 487 int ret; 488 489 try_again: 490 /* 491 * A reference is taken only on the head request which acts as a 492 * reference to the whole page group - the group will not be destroyed 493 * until the head reference is released. 494 */ 495 + head = nfs_page_find_head_request(page); 496 + if (!head) 497 return NULL; 498 + 499 + /* lock the page head first in order to avoid an ABBA inefficiency */ 500 + if (!nfs_lock_request(head)) { 501 + ret = nfs_wait_on_request(head); 502 + nfs_release_request(head); 503 + if (ret < 0) 504 + return ERR_PTR(ret); 505 + goto try_again; 506 } 507 508 + /* Ensure that nobody removed the request before we locked it */ 509 + if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { 510 + nfs_unlock_and_release_request(head); 511 + goto try_again; 512 + } 513 + 514 + ret = nfs_page_group_lock(head); 515 if (ret < 0) { 516 + nfs_unlock_and_release_request(head); 517 return ERR_PTR(ret); 518 } 519 520 /* lock each request in the page group */ 521 + total_bytes = head->wb_bytes; 522 + for (subreq = head->wb_this_page; subreq != head; 523 + subreq = subreq->wb_this_page) { 524 + 525 + if (!kref_get_unless_zero(&subreq->wb_kref)) 526 + continue; 527 + while (!nfs_lock_request(subreq)) { 528 + /* 529 + * Unlock page to allow nfs_page_group_sync_on_bit() 530 + * to succeed 531 + */ 532 + nfs_page_group_unlock(head); 533 + ret = nfs_wait_on_request(subreq); 534 + if (!ret) 535 + ret = nfs_page_group_lock(head); 536 + if (ret < 0) { 537 + nfs_unroll_locks(inode, head, subreq); 538 + nfs_release_request(subreq); 539 + nfs_unlock_and_release_request(head); 540 + return ERR_PTR(ret); 541 + } 542 + } 543 /* 544 * Subrequests are always contiguous, non overlapping 545 * and in order - but may be repeated (mirrored writes). ··· 534 } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || 535 ((subreq->wb_offset + subreq->wb_bytes) > 536 (head->wb_offset + total_bytes)))) { 537 + nfs_unroll_locks(inode, head, subreq); 538 + nfs_unlock_and_release_request(subreq); 539 nfs_page_group_unlock(head); 540 + nfs_unlock_and_release_request(head); 541 return ERR_PTR(-EIO); 542 } 543 + } 544 545 /* Now that all requests are locked, make sure they aren't on any list. 546 * Commit list removal accounting is done after locks are dropped */ ··· 573 head->wb_bytes = total_bytes; 574 } 575 576 + /* Postpone destruction of this request */ 577 + if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { 578 + set_bit(PG_INODE_REF, &head->wb_flags); 579 kref_get(&head->wb_kref); 580 + atomic_long_inc(&NFS_I(inode)->nrequests); 581 + } 582 583 nfs_page_group_unlock(head); 584 585 + nfs_destroy_unlinked_subrequests(destroy_list, head, inode); 586 587 + /* Did we lose a race with nfs_inode_remove_request()? */ 588 + if (!(PagePrivate(page) || PageSwapCache(page))) { 589 + nfs_unlock_and_release_request(head); 590 + return NULL; 591 + } 592 593 + /* still holds ref on head from nfs_page_find_head_request 594 * and still has lock on head from lock loop */ 595 return head; 596 } 597 598 static void nfs_write_error_remove_page(struct nfs_page *req) 599 { 600 nfs_end_page_writeback(req); 601 generic_error_remove_page(page_file_mapping(req->wb_page), 602 req->wb_page); ··· 624 * May return an error if the user signalled nfs_wait_on_request(). 625 */ 626 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 627 + struct page *page) 628 { 629 struct nfs_page *req; 630 int ret = 0; 631 632 + req = nfs_lock_and_join_requests(page); 633 if (!req) 634 goto out; 635 ret = PTR_ERR(req); ··· 672 int ret; 673 674 nfs_pageio_cond_complete(pgio, page_index(page)); 675 + ret = nfs_page_async_flush(pgio, page); 676 if (ret == -EAGAIN) { 677 redirty_page_for_writepage(wbc, page); 678 ret = 0; ··· 759 */ 760 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 761 { 762 + struct address_space *mapping = page_file_mapping(req->wb_page); 763 struct nfs_inode *nfsi = NFS_I(inode); 764 765 WARN_ON_ONCE(req->wb_this_page != req); ··· 766 /* Lock the request! */ 767 nfs_lock_request(req); 768 769 /* 770 * Swap-space should not get truncated. Hence no need to plug the race 771 * with invalidate/truncate. 772 */ 773 + spin_lock(&mapping->private_lock); 774 + if (!nfs_have_writebacks(inode) && 775 + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { 776 + spin_lock(&inode->i_lock); 777 + inode->i_version++; 778 + spin_unlock(&inode->i_lock); 779 + } 780 if (likely(!PageSwapCache(req->wb_page))) { 781 set_bit(PG_MAPPED, &req->wb_flags); 782 SetPagePrivate(req->wb_page); 783 set_page_private(req->wb_page, (unsigned long)req); 784 } 785 + spin_unlock(&mapping->private_lock); 786 + atomic_long_inc(&nfsi->nrequests); 787 /* this a head request for a page group - mark it as having an 788 * extra reference so sub groups can follow suit. 789 * This flag also informs pgio layer when to bump nrequests when 790 * adding subrequests. */ 791 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); 792 kref_get(&req->wb_kref); 793 } 794 795 /* ··· 794 */ 795 static void nfs_inode_remove_request(struct nfs_page *req) 796 { 797 + struct address_space *mapping = page_file_mapping(req->wb_page); 798 + struct inode *inode = mapping->host; 799 struct nfs_inode *nfsi = NFS_I(inode); 800 struct nfs_page *head; 801 802 + atomic_long_dec(&nfsi->nrequests); 803 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { 804 head = req->wb_head; 805 806 + spin_lock(&mapping->private_lock); 807 if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { 808 set_page_private(head->wb_page, 0); 809 ClearPagePrivate(head->wb_page); 810 clear_bit(PG_MAPPED, &head->wb_flags); 811 } 812 + spin_unlock(&mapping->private_lock); 813 } 814 815 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) ··· 868 * number of outstanding requests requiring a commit as well as 869 * the MM page stats. 870 * 871 + * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the 872 + * nfs_page lock. 873 */ 874 void 875 nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, ··· 876 { 877 set_bit(PG_CLEAN, &req->wb_flags); 878 nfs_list_add_request(req, dst); 879 + atomic_long_inc(&cinfo->mds->ncommit); 880 } 881 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); 882 ··· 896 void 897 nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) 898 { 899 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 900 nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); 901 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 902 if (req->wb_page) 903 nfs_mark_page_unstable(req->wb_page, cinfo); 904 } ··· 922 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 923 return; 924 nfs_list_remove_request(req); 925 + atomic_long_dec(&cinfo->mds->ncommit); 926 } 927 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 928 ··· 967 WB_RECLAIMABLE); 968 } 969 970 + /* Called holding the request lock on @req */ 971 static void 972 nfs_clear_request_commit(struct nfs_page *req) 973 { ··· 976 struct nfs_commit_info cinfo; 977 978 nfs_init_cinfo_from_inode(&cinfo, inode); 979 + mutex_lock(&NFS_I(inode)->commit_mutex); 980 if (!pnfs_clear_request_commit(req, &cinfo)) { 981 nfs_request_remove_commit_list(req, &cinfo); 982 } 983 + mutex_unlock(&NFS_I(inode)->commit_mutex); 984 nfs_clear_page_commit(req->wb_page); 985 } 986 } ··· 1023 remove_req: 1024 nfs_inode_remove_request(req); 1025 next: 1026 nfs_end_page_writeback(req); 1027 nfs_release_request(req); 1028 } ··· 1035 unsigned long 1036 nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 1037 { 1038 + return atomic_long_read(&cinfo->mds->ncommit); 1039 } 1040 1041 + /* NFS_I(cinfo->inode)->commit_mutex held by caller */ 1042 int 1043 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 1044 struct nfs_commit_info *cinfo, int max) 1045 { 1046 + struct nfs_page *req; 1047 int ret = 0; 1048 1049 + while(!list_empty(src)) { 1050 + req = list_first_entry(src, struct nfs_page, wb_list); 1051 kref_get(&req->wb_kref); 1052 + if (!nfs_lock_request(req)) { 1053 + int status; 1054 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1055 + status = nfs_wait_on_request(req); 1056 + nfs_release_request(req); 1057 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1058 + if (status < 0) 1059 + break; 1060 + continue; 1061 + } 1062 nfs_request_remove_commit_list(req, cinfo); 1063 nfs_list_add_request(req, dst); 1064 ret++; 1065 if ((ret == max) && !cinfo->dreq) 1066 break; 1067 + cond_resched(); 1068 } 1069 return ret; 1070 } ··· 1076 { 1077 int ret = 0; 1078 1079 + if (!atomic_long_read(&cinfo->mds->ncommit)) 1080 + return 0; 1081 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1082 + if (atomic_long_read(&cinfo->mds->ncommit) > 0) { 1083 const int max = INT_MAX; 1084 1085 ret = nfs_scan_commit_list(&cinfo->mds->list, dst, 1086 cinfo, max); 1087 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); 1088 } 1089 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1090 return ret; 1091 } 1092 ··· 1105 unsigned int end; 1106 int error; 1107 1108 end = offset + bytes; 1109 1110 + req = nfs_lock_and_join_requests(page); 1111 + if (IS_ERR_OR_NULL(req)) 1112 + return req; 1113 1114 + rqend = req->wb_offset + req->wb_bytes; 1115 + /* 1116 + * Tell the caller to flush out the request if 1117 + * the offsets are non-contiguous. 1118 + * Note: nfs_flush_incompatible() will already 1119 + * have flushed out requests having wrong owners. 1120 + */ 1121 + if (offset > rqend || end < req->wb_offset) 1122 + goto out_flushme; 1123 1124 /* Okay, the request matches. Update the region */ 1125 if (offset < req->wb_offset) { ··· 1152 req->wb_bytes = end - req->wb_offset; 1153 else 1154 req->wb_bytes = rqend - req->wb_offset; 1155 return req; 1156 out_flushme: 1157 + /* 1158 + * Note: we mark the request dirty here because 1159 + * nfs_lock_and_join_requests() cannot preserve 1160 + * commit flags, so we have to replay the write. 1161 + */ 1162 + nfs_mark_request_dirty(req); 1163 + nfs_unlock_and_release_request(req); 1164 error = nfs_wb_page(inode, page); 1165 + return (error < 0) ? ERR_PTR(error) : NULL; 1166 } 1167 1168 /* ··· 1227 l_ctx = req->wb_lock_context; 1228 do_flush = req->wb_page != page || 1229 !nfs_match_open_context(req->wb_context, ctx); 1230 if (l_ctx && flctx && 1231 !(list_empty_careful(&flctx->flc_posix) && 1232 list_empty_careful(&flctx->flc_flock))) { ··· 1412 { 1413 nfs_mark_request_dirty(req); 1414 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); 1415 nfs_end_page_writeback(req); 1416 nfs_release_request(req); 1417 } ··· 1934 int ret = 0; 1935 1936 /* no commits means nothing needs to be done */ 1937 + if (!atomic_long_read(&nfsi->commit_info.ncommit)) 1938 return ret; 1939 1940 if (wbc->sync_mode == WB_SYNC_NONE) { ··· 2015 2016 /* blocking call to cancel all requests and join to a single (head) 2017 * request */ 2018 + req = nfs_lock_and_join_requests(page); 2019 2020 if (IS_ERR(req)) { 2021 ret = PTR_ERR(req);

+3 -2

include/linux/nfs_fs.h

··· 154 */ 155 __be32 cookieverf[2]; 156 157 - unsigned long nrequests; 158 struct nfs_mds_commit_info commit_info; 159 160 /* Open contexts for shared mmap writes */ ··· 163 /* Readers: in-flight sillydelete RPC calls */ 164 /* Writers: rmdir */ 165 struct rw_semaphore rmdir_sem; 166 167 #if IS_ENABLED(CONFIG_NFS_V4) 168 struct nfs4_cached_acl *nfs4_acl; ··· 511 static inline int 512 nfs_have_writebacks(struct inode *inode) 513 { 514 - return NFS_I(inode)->nrequests != 0; 515 } 516 517 /*

··· 154 */ 155 __be32 cookieverf[2]; 156 157 + atomic_long_t nrequests; 158 struct nfs_mds_commit_info commit_info; 159 160 /* Open contexts for shared mmap writes */ ··· 163 /* Readers: in-flight sillydelete RPC calls */ 164 /* Writers: rmdir */ 165 struct rw_semaphore rmdir_sem; 166 + struct mutex commit_mutex; 167 168 #if IS_ENABLED(CONFIG_NFS_V4) 169 struct nfs4_cached_acl *nfs4_acl; ··· 510 static inline int 511 nfs_have_writebacks(struct inode *inode) 512 { 513 + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; 514 } 515 516 /*

+1 -2

include/linux/nfs_page.h

··· 139 extern int nfs_wait_on_request(struct nfs_page *); 140 extern void nfs_unlock_request(struct nfs_page *req); 141 extern void nfs_unlock_and_release_request(struct nfs_page *); 142 - extern int nfs_page_group_lock(struct nfs_page *, bool); 143 - extern void nfs_page_group_lock_wait(struct nfs_page *); 144 extern void nfs_page_group_unlock(struct nfs_page *); 145 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); 146 extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);

··· 139 extern int nfs_wait_on_request(struct nfs_page *); 140 extern void nfs_unlock_request(struct nfs_page *req); 141 extern void nfs_unlock_and_release_request(struct nfs_page *); 142 + extern int nfs_page_group_lock(struct nfs_page *); 143 extern void nfs_page_group_unlock(struct nfs_page *); 144 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); 145 extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);

+1 -1

include/linux/nfs_xdr.h

··· 1476 1477 struct nfs_mds_commit_info { 1478 atomic_t rpcs_out; 1479 - unsigned long ncommit; 1480 struct list_head list; 1481 }; 1482

··· 1476 1477 struct nfs_mds_commit_info { 1478 atomic_t rpcs_out; 1479 + atomic_long_t ncommit; 1480 struct list_head list; 1481 }; 1482

+2

include/linux/sunrpc/sched.h

··· 139 #define RPC_TASK_RUNNING 0 140 #define RPC_TASK_QUEUED 1 141 #define RPC_TASK_ACTIVE 2 142 143 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) 144 #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)

··· 139 #define RPC_TASK_RUNNING 0 140 #define RPC_TASK_QUEUED 1 141 #define RPC_TASK_ACTIVE 2 142 + #define RPC_TASK_MSG_RECV 3 143 + #define RPC_TASK_MSG_RECV_WAIT 4 144 145 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) 146 #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)

+3

include/linux/sunrpc/xprt.h

··· 232 */ 233 spinlock_t transport_lock; /* lock transport info */ 234 spinlock_t reserve_lock; /* lock slot table */ 235 u32 xid; /* Next XID value to use */ 236 struct rpc_task * snd_task; /* Task blocked in send */ 237 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ ··· 373 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); 374 struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); 375 void xprt_complete_rqst(struct rpc_task *task, int copied); 376 void xprt_release_rqst_cong(struct rpc_task *task); 377 void xprt_disconnect_done(struct rpc_xprt *xprt); 378 void xprt_force_disconnect(struct rpc_xprt *xprt);

··· 232 */ 233 spinlock_t transport_lock; /* lock transport info */ 234 spinlock_t reserve_lock; /* lock slot table */ 235 + spinlock_t recv_lock; /* lock receive list */ 236 u32 xid; /* Next XID value to use */ 237 struct rpc_task * snd_task; /* Task blocked in send */ 238 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ ··· 372 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); 373 struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); 374 void xprt_complete_rqst(struct rpc_task *task, int copied); 375 + void xprt_pin_rqst(struct rpc_rqst *req); 376 + void xprt_unpin_rqst(struct rpc_rqst *req); 377 void xprt_release_rqst_cong(struct rpc_task *task); 378 void xprt_disconnect_done(struct rpc_xprt *xprt); 379 void xprt_force_disconnect(struct rpc_xprt *xprt);

+2 -2

net/sunrpc/backchannel_rqst.c

··· 171 /* 172 * Add the temporary list to the backchannel preallocation list 173 */ 174 - spin_lock_bh(&xprt->bc_pa_lock); 175 list_splice(&tmp_list, &xprt->bc_pa_list); 176 xprt_inc_alloc_count(xprt, min_reqs); 177 - spin_unlock_bh(&xprt->bc_pa_lock); 178 179 dprintk("RPC: setup backchannel transport done\n"); 180 return 0;

··· 171 /* 172 * Add the temporary list to the backchannel preallocation list 173 */ 174 + spin_lock(&xprt->bc_pa_lock); 175 list_splice(&tmp_list, &xprt->bc_pa_list); 176 xprt_inc_alloc_count(xprt, min_reqs); 177 + spin_unlock(&xprt->bc_pa_lock); 178 179 dprintk("RPC: setup backchannel transport done\n"); 180 return 0;

+3 -3

net/sunrpc/svcsock.c

··· 1001 1002 if (!bc_xprt) 1003 return -EAGAIN; 1004 - spin_lock_bh(&bc_xprt->transport_lock); 1005 req = xprt_lookup_rqst(bc_xprt, xid); 1006 if (!req) 1007 goto unlock_notfound; ··· 1019 memcpy(dst->iov_base, src->iov_base, src->iov_len); 1020 xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); 1021 rqstp->rq_arg.len = 0; 1022 - spin_unlock_bh(&bc_xprt->transport_lock); 1023 return 0; 1024 unlock_notfound: 1025 printk(KERN_NOTICE ··· 1028 __func__, ntohl(calldir), 1029 bc_xprt, ntohl(xid)); 1030 unlock_eagain: 1031 - spin_unlock_bh(&bc_xprt->transport_lock); 1032 return -EAGAIN; 1033 } 1034

··· 1001 1002 if (!bc_xprt) 1003 return -EAGAIN; 1004 + spin_lock(&bc_xprt->recv_lock); 1005 req = xprt_lookup_rqst(bc_xprt, xid); 1006 if (!req) 1007 goto unlock_notfound; ··· 1019 memcpy(dst->iov_base, src->iov_base, src->iov_len); 1020 xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); 1021 rqstp->rq_arg.len = 0; 1022 + spin_unlock(&bc_xprt->recv_lock); 1023 return 0; 1024 unlock_notfound: 1025 printk(KERN_NOTICE ··· 1028 __func__, ntohl(calldir), 1029 bc_xprt, ntohl(xid)); 1030 unlock_eagain: 1031 + spin_unlock(&bc_xprt->recv_lock); 1032 return -EAGAIN; 1033 } 1034

+51 -4

net/sunrpc/xprt.c

··· 844 } 845 EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 846 847 static void xprt_update_rtt(struct rpc_task *task) 848 { 849 struct rpc_rqst *req = task->tk_rqstp; ··· 1008 /* 1009 * Add to the list only if we're expecting a reply 1010 */ 1011 - spin_lock_bh(&xprt->transport_lock); 1012 /* Update the softirq receive buffer */ 1013 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1014 sizeof(req->rq_private_buf)); 1015 /* Add request to the receive list */ 1016 list_add_tail(&req->rq_list, &xprt->recv); 1017 - spin_unlock_bh(&xprt->transport_lock); 1018 xprt_reset_majortimeo(req); 1019 /* Turn off autodisconnect */ 1020 del_singleshot_timer_sync(&xprt->timer); ··· 1329 task->tk_ops->rpc_count_stats(task, task->tk_calldata); 1330 else if (task->tk_client) 1331 rpc_count_iostats(task, task->tk_client->cl_metrics); 1332 spin_lock_bh(&xprt->transport_lock); 1333 xprt->ops->release_xprt(xprt, task); 1334 if (xprt->ops->release_request) 1335 xprt->ops->release_request(task); 1336 - if (!list_empty(&req->rq_list)) 1337 - list_del(&req->rq_list); 1338 xprt->last_used = jiffies; 1339 xprt_schedule_autodisconnect(xprt); 1340 spin_unlock_bh(&xprt->transport_lock); ··· 1364 1365 spin_lock_init(&xprt->transport_lock); 1366 spin_lock_init(&xprt->reserve_lock); 1367 1368 INIT_LIST_HEAD(&xprt->free); 1369 INIT_LIST_HEAD(&xprt->recv);

··· 844 } 845 EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 846 847 + /** 848 + * xprt_pin_rqst - Pin a request on the transport receive list 849 + * @req: Request to pin 850 + * 851 + * Caller must ensure this is atomic with the call to xprt_lookup_rqst() 852 + * so should be holding the xprt transport lock. 853 + */ 854 + void xprt_pin_rqst(struct rpc_rqst *req) 855 + { 856 + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate); 857 + } 858 + 859 + /** 860 + * xprt_unpin_rqst - Unpin a request on the transport receive list 861 + * @req: Request to pin 862 + * 863 + * Caller should be holding the xprt transport lock. 864 + */ 865 + void xprt_unpin_rqst(struct rpc_rqst *req) 866 + { 867 + struct rpc_task *task = req->rq_task; 868 + 869 + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate); 870 + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate)) 871 + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV); 872 + } 873 + 874 + static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) 875 + __must_hold(&req->rq_xprt->recv_lock) 876 + { 877 + struct rpc_task *task = req->rq_task; 878 + 879 + if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { 880 + spin_unlock(&req->rq_xprt->recv_lock); 881 + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); 882 + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, 883 + TASK_UNINTERRUPTIBLE); 884 + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); 885 + spin_lock(&req->rq_xprt->recv_lock); 886 + } 887 + } 888 + 889 static void xprt_update_rtt(struct rpc_task *task) 890 { 891 struct rpc_rqst *req = task->tk_rqstp; ··· 966 /* 967 * Add to the list only if we're expecting a reply 968 */ 969 /* Update the softirq receive buffer */ 970 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 971 sizeof(req->rq_private_buf)); 972 /* Add request to the receive list */ 973 + spin_lock(&xprt->recv_lock); 974 list_add_tail(&req->rq_list, &xprt->recv); 975 + spin_unlock(&xprt->recv_lock); 976 xprt_reset_majortimeo(req); 977 /* Turn off autodisconnect */ 978 del_singleshot_timer_sync(&xprt->timer); ··· 1287 task->tk_ops->rpc_count_stats(task, task->tk_calldata); 1288 else if (task->tk_client) 1289 rpc_count_iostats(task, task->tk_client->cl_metrics); 1290 + spin_lock(&xprt->recv_lock); 1291 + if (!list_empty(&req->rq_list)) { 1292 + list_del(&req->rq_list); 1293 + xprt_wait_on_pinned_rqst(req); 1294 + } 1295 + spin_unlock(&xprt->recv_lock); 1296 spin_lock_bh(&xprt->transport_lock); 1297 xprt->ops->release_xprt(xprt, task); 1298 if (xprt->ops->release_request) 1299 xprt->ops->release_request(task); 1300 xprt->last_used = jiffies; 1301 xprt_schedule_autodisconnect(xprt); 1302 spin_unlock_bh(&xprt->transport_lock); ··· 1318 1319 spin_lock_init(&xprt->transport_lock); 1320 spin_lock_init(&xprt->reserve_lock); 1321 + spin_lock_init(&xprt->recv_lock); 1322 1323 INIT_LIST_HEAD(&xprt->free); 1324 INIT_LIST_HEAD(&xprt->recv);

+4 -4

net/sunrpc/xprtrdma/rpc_rdma.c

··· 1051 * RPC completion while holding the transport lock to ensure 1052 * the rep, rqst, and rq_task pointers remain stable. 1053 */ 1054 - spin_lock_bh(&xprt->transport_lock); 1055 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); 1056 if (!rqst) 1057 goto out_norqst; ··· 1136 xprt_release_rqst_cong(rqst->rq_task); 1137 1138 xprt_complete_rqst(rqst->rq_task, status); 1139 - spin_unlock_bh(&xprt->transport_lock); 1140 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", 1141 __func__, xprt, rqst, status); 1142 return; ··· 1187 r_xprt->rx_stats.bad_reply_count++; 1188 goto out; 1189 1190 - /* The req was still available, but by the time the transport_lock 1191 * was acquired, the rqst and task had been released. Thus the RPC 1192 * has already been terminated. 1193 */ 1194 out_norqst: 1195 - spin_unlock_bh(&xprt->transport_lock); 1196 rpcrdma_buffer_put(req); 1197 dprintk("RPC: %s: race, no rqst left for req %p\n", 1198 __func__, req);

··· 1051 * RPC completion while holding the transport lock to ensure 1052 * the rep, rqst, and rq_task pointers remain stable. 1053 */ 1054 + spin_lock(&xprt->recv_lock); 1055 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); 1056 if (!rqst) 1057 goto out_norqst; ··· 1136 xprt_release_rqst_cong(rqst->rq_task); 1137 1138 xprt_complete_rqst(rqst->rq_task, status); 1139 + spin_unlock(&xprt->recv_lock); 1140 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", 1141 __func__, xprt, rqst, status); 1142 return; ··· 1187 r_xprt->rx_stats.bad_reply_count++; 1188 goto out; 1189 1190 + /* The req was still available, but by the time the recv_lock 1191 * was acquired, the rqst and task had been released. Thus the RPC 1192 * has already been terminated. 1193 */ 1194 out_norqst: 1195 + spin_unlock(&xprt->recv_lock); 1196 rpcrdma_buffer_put(req); 1197 dprintk("RPC: %s: race, no rqst left for req %p\n", 1198 __func__, req);

+5 -2

net/sunrpc/xprtrdma/svc_rdma_backchannel.c

··· 52 if (src->iov_len < 24) 53 goto out_shortreply; 54 55 - spin_lock_bh(&xprt->transport_lock); 56 req = xprt_lookup_rqst(xprt, xid); 57 if (!req) 58 goto out_notfound; ··· 69 else if (credits > r_xprt->rx_buf.rb_bc_max_requests) 70 credits = r_xprt->rx_buf.rb_bc_max_requests; 71 72 cwnd = xprt->cwnd; 73 xprt->cwnd = credits << RPC_CWNDSHIFT; 74 if (xprt->cwnd > cwnd) 75 xprt_release_rqst_cong(req->rq_task); 76 77 ret = 0; 78 xprt_complete_rqst(req->rq_task, rcvbuf->len); 79 rcvbuf->len = 0; 80 81 out_unlock: 82 - spin_unlock_bh(&xprt->transport_lock); 83 out: 84 return ret; 85

··· 52 if (src->iov_len < 24) 53 goto out_shortreply; 54 55 + spin_lock(&xprt->recv_lock); 56 req = xprt_lookup_rqst(xprt, xid); 57 if (!req) 58 goto out_notfound; ··· 69 else if (credits > r_xprt->rx_buf.rb_bc_max_requests) 70 credits = r_xprt->rx_buf.rb_bc_max_requests; 71 72 + spin_lock_bh(&xprt->transport_lock); 73 cwnd = xprt->cwnd; 74 xprt->cwnd = credits << RPC_CWNDSHIFT; 75 if (xprt->cwnd > cwnd) 76 xprt_release_rqst_cong(req->rq_task); 77 + spin_unlock_bh(&xprt->transport_lock); 78 + 79 80 ret = 0; 81 xprt_complete_rqst(req->rq_task, rcvbuf->len); 82 rcvbuf->len = 0; 83 84 out_unlock: 85 + spin_unlock(&xprt->recv_lock); 86 out: 87 return ret; 88

+44 -40

net/sunrpc/xprtsock.c

··· 969 return; 970 971 /* Look up and lock the request corresponding to the given XID */ 972 - spin_lock_bh(&xprt->transport_lock); 973 rovr = xprt_lookup_rqst(xprt, *xp); 974 if (!rovr) 975 goto out_unlock; 976 task = rovr->rq_task; 977 978 copied = rovr->rq_private_buf.buflen; ··· 983 984 if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { 985 dprintk("RPC: sk_buff copy failed\n"); 986 - goto out_unlock; 987 } 988 989 xprt_complete_rqst(task, copied); 990 - 991 out_unlock: 992 - spin_unlock_bh(&xprt->transport_lock); 993 } 994 995 static void xs_local_data_receive(struct sock_xprt *transport) ··· 1055 return; 1056 1057 /* Look up and lock the request corresponding to the given XID */ 1058 - spin_lock_bh(&xprt->transport_lock); 1059 rovr = xprt_lookup_rqst(xprt, *xp); 1060 if (!rovr) 1061 goto out_unlock; 1062 task = rovr->rq_task; 1063 1064 if ((copied = rovr->rq_private_buf.buflen) > repsize) ··· 1069 /* Suck it into the iovec, verify checksum if not done by hw. */ 1070 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1071 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1072 - goto out_unlock; 1073 } 1074 1075 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1076 1077 xprt_adjust_cwnd(xprt, task, copied); 1078 - xprt_complete_rqst(task, copied); 1079 - 1080 - out_unlock: 1081 spin_unlock_bh(&xprt->transport_lock); 1082 } 1083 1084 static void xs_udp_data_receive(struct sock_xprt *transport) ··· 1289 } 1290 1291 len = desc->count; 1292 - if (len > transport->tcp_reclen - transport->tcp_offset) { 1293 - struct xdr_skb_reader my_desc; 1294 - 1295 - len = transport->tcp_reclen - transport->tcp_offset; 1296 - memcpy(&my_desc, desc, sizeof(my_desc)); 1297 - my_desc.count = len; 1298 - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1299 - &my_desc, xdr_skb_read_bits); 1300 - desc->count -= r; 1301 - desc->offset += r; 1302 - } else 1303 - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1304 desc, xdr_skb_read_bits); 1305 1306 - if (r > 0) { 1307 - transport->tcp_copied += r; 1308 - transport->tcp_offset += r; 1309 - } 1310 - if (r != len) { 1311 /* Error when copying to the receive buffer, 1312 * usually because we weren't able to allocate 1313 * additional buffer pages. All we can do now ··· 1313 transport->tcp_offset, transport->tcp_reclen); 1314 return; 1315 } 1316 1317 dprintk("RPC: XID %08x read %zd bytes\n", 1318 ntohl(transport->tcp_xid), r); ··· 1346 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1347 1348 /* Find and lock the request corresponding to this xid */ 1349 - spin_lock_bh(&xprt->transport_lock); 1350 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1351 if (!req) { 1352 dprintk("RPC: XID %08x request not found!\n", 1353 ntohl(transport->tcp_xid)); 1354 - spin_unlock_bh(&xprt->transport_lock); 1355 return -1; 1356 } 1357 1358 xs_tcp_read_common(xprt, desc, req); 1359 1360 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1361 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1362 - 1363 - spin_unlock_bh(&xprt->transport_lock); 1364 return 0; 1365 } 1366 ··· 1382 container_of(xprt, struct sock_xprt, xprt); 1383 struct rpc_rqst *req; 1384 1385 - /* Look up and lock the request corresponding to the given XID */ 1386 - spin_lock_bh(&xprt->transport_lock); 1387 req = xprt_lookup_bc_request(xprt, transport->tcp_xid); 1388 if (req == NULL) { 1389 - spin_unlock_bh(&xprt->transport_lock); 1390 printk(KERN_WARNING "Callback slot table overflowed\n"); 1391 xprt_force_disconnect(xprt); 1392 return -1; ··· 1395 1396 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1397 xprt_complete_bc_request(req, transport->tcp_copied); 1398 - spin_unlock_bh(&xprt->transport_lock); 1399 1400 return 0; 1401 } ··· 1519 .arg.data = xprt, 1520 }; 1521 unsigned long total = 0; 1522 int read = 0; 1523 1524 mutex_lock(&transport->recv_mutex); ··· 1528 goto out; 1529 1530 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1531 - for (;;) { 1532 lock_sock(sk); 1533 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1534 if (read <= 0) { 1535 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1536 release_sock(sk); 1537 - if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1538 - break; 1539 - } else { 1540 - release_sock(sk); 1541 - total += read; 1542 } 1543 rd_desc.count = 65536; 1544 } 1545 out: 1546 mutex_unlock(&transport->recv_mutex); 1547 trace_xs_tcp_data_ready(xprt, read, total);

··· 969 return; 970 971 /* Look up and lock the request corresponding to the given XID */ 972 + spin_lock(&xprt->recv_lock); 973 rovr = xprt_lookup_rqst(xprt, *xp); 974 if (!rovr) 975 goto out_unlock; 976 + xprt_pin_rqst(rovr); 977 + spin_unlock(&xprt->recv_lock); 978 task = rovr->rq_task; 979 980 copied = rovr->rq_private_buf.buflen; ··· 981 982 if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { 983 dprintk("RPC: sk_buff copy failed\n"); 984 + spin_lock(&xprt->recv_lock); 985 + goto out_unpin; 986 } 987 988 + spin_lock(&xprt->recv_lock); 989 xprt_complete_rqst(task, copied); 990 + out_unpin: 991 + xprt_unpin_rqst(rovr); 992 out_unlock: 993 + spin_unlock(&xprt->recv_lock); 994 } 995 996 static void xs_local_data_receive(struct sock_xprt *transport) ··· 1050 return; 1051 1052 /* Look up and lock the request corresponding to the given XID */ 1053 + spin_lock(&xprt->recv_lock); 1054 rovr = xprt_lookup_rqst(xprt, *xp); 1055 if (!rovr) 1056 goto out_unlock; 1057 + xprt_pin_rqst(rovr); 1058 + spin_unlock(&xprt->recv_lock); 1059 task = rovr->rq_task; 1060 1061 if ((copied = rovr->rq_private_buf.buflen) > repsize) ··· 1062 /* Suck it into the iovec, verify checksum if not done by hw. */ 1063 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1064 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1065 + spin_lock(&xprt->recv_lock); 1066 + goto out_unpin; 1067 } 1068 1069 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1070 1071 + spin_lock_bh(&xprt->transport_lock); 1072 xprt_adjust_cwnd(xprt, task, copied); 1073 spin_unlock_bh(&xprt->transport_lock); 1074 + spin_lock(&xprt->recv_lock); 1075 + xprt_complete_rqst(task, copied); 1076 + out_unpin: 1077 + xprt_unpin_rqst(rovr); 1078 + out_unlock: 1079 + spin_unlock(&xprt->recv_lock); 1080 } 1081 1082 static void xs_udp_data_receive(struct sock_xprt *transport) ··· 1277 } 1278 1279 len = desc->count; 1280 + if (len > transport->tcp_reclen - transport->tcp_offset) 1281 + desc->count = transport->tcp_reclen - transport->tcp_offset; 1282 + r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1283 desc, xdr_skb_read_bits); 1284 1285 + if (desc->count) { 1286 /* Error when copying to the receive buffer, 1287 * usually because we weren't able to allocate 1288 * additional buffer pages. All we can do now ··· 1314 transport->tcp_offset, transport->tcp_reclen); 1315 return; 1316 } 1317 + 1318 + transport->tcp_copied += r; 1319 + transport->tcp_offset += r; 1320 + desc->count = len - r; 1321 1322 dprintk("RPC: XID %08x read %zd bytes\n", 1323 ntohl(transport->tcp_xid), r); ··· 1343 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1344 1345 /* Find and lock the request corresponding to this xid */ 1346 + spin_lock(&xprt->recv_lock); 1347 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1348 if (!req) { 1349 dprintk("RPC: XID %08x request not found!\n", 1350 ntohl(transport->tcp_xid)); 1351 + spin_unlock(&xprt->recv_lock); 1352 return -1; 1353 } 1354 + xprt_pin_rqst(req); 1355 + spin_unlock(&xprt->recv_lock); 1356 1357 xs_tcp_read_common(xprt, desc, req); 1358 1359 + spin_lock(&xprt->recv_lock); 1360 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1361 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1362 + xprt_unpin_rqst(req); 1363 + spin_unlock(&xprt->recv_lock); 1364 return 0; 1365 } 1366 ··· 1376 container_of(xprt, struct sock_xprt, xprt); 1377 struct rpc_rqst *req; 1378 1379 + /* Look up the request corresponding to the given XID */ 1380 req = xprt_lookup_bc_request(xprt, transport->tcp_xid); 1381 if (req == NULL) { 1382 printk(KERN_WARNING "Callback slot table overflowed\n"); 1383 xprt_force_disconnect(xprt); 1384 return -1; ··· 1391 1392 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1393 xprt_complete_bc_request(req, transport->tcp_copied); 1394 1395 return 0; 1396 } ··· 1516 .arg.data = xprt, 1517 }; 1518 unsigned long total = 0; 1519 + int loop; 1520 int read = 0; 1521 1522 mutex_lock(&transport->recv_mutex); ··· 1524 goto out; 1525 1526 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1527 + for (loop = 0; loop < 64; loop++) { 1528 lock_sock(sk); 1529 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1530 if (read <= 0) { 1531 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1532 release_sock(sk); 1533 + break; 1534 } 1535 + release_sock(sk); 1536 + total += read; 1537 rd_desc.count = 65536; 1538 } 1539 + if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1540 + queue_work(xprtiod_workqueue, &transport->recv_worker); 1541 out: 1542 mutex_unlock(&transport->recv_mutex); 1543 trace_xs_tcp_data_ready(xprt, read, total);