commit b7561e51868c929c40b26e45e8e63a6aa5aa211f

+1 -1

fs/nfs/callback_proc.c

··· 51 51 goto out_iput; 52 52 res->size = i_size_read(inode); 53 53 res->change_attr = delegation->change_attr; 54 - if (nfsi->nrequests != 0) 54 + if (nfs_have_writebacks(inode)) 55 55 res->change_attr++; 56 56 res->ctime = inode->i_ctime; 57 57 res->mtime = inode->i_mtime;

+1 -1

fs/nfs/delegation.c

··· 1089 1089 delegation = rcu_dereference(nfsi->delegation); 1090 1090 if (delegation == NULL || !(delegation->type & FMODE_WRITE)) 1091 1091 goto out; 1092 - if (nfsi->nrequests < delegation->pagemod_limit) 1092 + if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit) 1093 1093 ret = false; 1094 1094 out: 1095 1095 rcu_read_unlock();

+2 -2

fs/nfs/direct.c

··· 616 616 struct list_head *list, 617 617 struct nfs_commit_info *cinfo) 618 618 { 619 - spin_lock(&cinfo->inode->i_lock); 619 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 620 620 #ifdef CONFIG_NFS_V4_1 621 621 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) 622 622 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 623 623 #endif 624 624 nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 625 - spin_unlock(&cinfo->inode->i_lock); 625 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 626 626 } 627 627 628 628 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)

+5 -5

fs/nfs/inode.c

··· 1285 1285 1286 1286 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1287 1287 { 1288 - struct nfs_inode *nfsi = NFS_I(inode); 1289 1288 unsigned long ret = 0; 1290 1289 1291 1290 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) ··· 1314 1315 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1315 1316 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 1316 1317 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 1317 - && nfsi->nrequests == 0) { 1318 + && !nfs_have_writebacks(inode)) { 1318 1319 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 1319 1320 ret |= NFS_INO_INVALID_ATTR; 1320 1321 } ··· 1822 1823 if (new_isize != cur_isize) { 1823 1824 /* Do we perhaps have any outstanding writes, or has 1824 1825 * the file grown beyond our last write? */ 1825 - if (nfsi->nrequests == 0 || new_isize > cur_isize) { 1826 + if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { 1826 1827 i_size_write(inode, new_isize); 1827 1828 if (!have_writers) 1828 1829 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ··· 2011 2012 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 2012 2013 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 2013 2014 INIT_LIST_HEAD(&nfsi->commit_info.list); 2014 - nfsi->nrequests = 0; 2015 - nfsi->commit_info.ncommit = 0; 2015 + atomic_long_set(&nfsi->nrequests, 0); 2016 + atomic_long_set(&nfsi->commit_info.ncommit, 0); 2016 2017 atomic_set(&nfsi->commit_info.rpcs_out, 0); 2017 2018 init_rwsem(&nfsi->rmdir_sem); 2019 + mutex_init(&nfsi->commit_mutex); 2018 2020 nfs4_init_once(nfsi); 2019 2021 } 2020 2022

+19 -48

fs/nfs/pagelist.c

··· 134 134 /* 135 135 * nfs_page_group_lock - lock the head of the page group 136 136 * @req - request in group that is to be locked 137 - * @nonblock - if true don't block waiting for lock 138 137 * 139 - * this lock must be held if modifying the page group list 138 + * this lock must be held when traversing or modifying the page 139 + * group list 140 140 * 141 - * return 0 on success, < 0 on error: -EDELAY if nonblocking or the 142 - * result from wait_on_bit_lock 143 - * 144 - * NOTE: calling with nonblock=false should always have set the 145 - * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock 146 - * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. 141 + * return 0 on success, < 0 on error 147 142 */ 148 143 int 149 - nfs_page_group_lock(struct nfs_page *req, bool nonblock) 144 + nfs_page_group_lock(struct nfs_page *req) 150 145 { 151 146 struct nfs_page *head = req->wb_head; 152 147 ··· 150 155 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) 151 156 return 0; 152 157 153 - if (!nonblock) { 154 - set_bit(PG_CONTENDED1, &head->wb_flags); 155 - smp_mb__after_atomic(); 156 - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 157 - TASK_UNINTERRUPTIBLE); 158 - } 159 - 160 - return -EAGAIN; 161 - } 162 - 163 - /* 164 - * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it 165 - * @req - a request in the group 166 - * 167 - * This is a blocking call to wait for the group lock to be cleared. 168 - */ 169 - void 170 - nfs_page_group_lock_wait(struct nfs_page *req) 171 - { 172 - struct nfs_page *head = req->wb_head; 173 - 174 - WARN_ON_ONCE(head != head->wb_head); 175 - 176 - if (!test_bit(PG_HEADLOCK, &head->wb_flags)) 177 - return; 178 158 set_bit(PG_CONTENDED1, &head->wb_flags); 179 159 smp_mb__after_atomic(); 180 - wait_on_bit(&head->wb_flags, PG_HEADLOCK, 181 - TASK_UNINTERRUPTIBLE); 160 + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 161 + TASK_UNINTERRUPTIBLE); 182 162 } 183 163 184 164 /* ··· 216 246 { 217 247 bool ret; 218 248 219 - nfs_page_group_lock(req, false); 249 + nfs_page_group_lock(req); 220 250 ret = nfs_page_group_sync_on_bit_locked(req, bit); 221 251 nfs_page_group_unlock(req); 222 252 ··· 258 288 inode = page_file_mapping(req->wb_page)->host; 259 289 set_bit(PG_INODE_REF, &req->wb_flags); 260 290 kref_get(&req->wb_kref); 261 - spin_lock(&inode->i_lock); 262 - NFS_I(inode)->nrequests++; 263 - spin_unlock(&inode->i_lock); 291 + atomic_long_inc(&NFS_I(inode)->nrequests); 264 292 } 265 293 } 266 294 } ··· 274 306 nfs_page_group_destroy(struct kref *kref) 275 307 { 276 308 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 309 + struct nfs_page *head = req->wb_head; 277 310 struct nfs_page *tmp, *next; 278 311 279 - /* subrequests must release the ref on the head request */ 280 - if (req->wb_head != req) 281 - nfs_release_request(req->wb_head); 282 - 283 312 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) 284 - return; 313 + goto out; 285 314 286 315 tmp = req; 287 316 do { ··· 289 324 nfs_free_request(tmp); 290 325 tmp = next; 291 326 } while (tmp != req); 327 + out: 328 + /* subrequests must release the ref on the head request */ 329 + if (head != req) 330 + nfs_release_request(head); 292 331 } 293 332 294 333 /** ··· 434 465 { 435 466 kref_put(&req->wb_kref, nfs_page_group_destroy); 436 467 } 468 + EXPORT_SYMBOL_GPL(nfs_release_request); 437 469 438 470 /** 439 471 * nfs_wait_on_request - Wait for a request to complete. ··· 453 483 return wait_on_bit_io(&req->wb_flags, PG_BUSY, 454 484 TASK_UNINTERRUPTIBLE); 455 485 } 486 + EXPORT_SYMBOL_GPL(nfs_wait_on_request); 456 487 457 488 /* 458 489 * nfs_generic_pg_test - determine if requests can be coalesced ··· 1007 1036 unsigned int bytes_left = 0; 1008 1037 unsigned int offset, pgbase; 1009 1038 1010 - nfs_page_group_lock(req, false); 1039 + nfs_page_group_lock(req); 1011 1040 1012 1041 subreq = req; 1013 1042 bytes_left = subreq->wb_bytes; ··· 1029 1058 if (mirror->pg_recoalesce) 1030 1059 return 0; 1031 1060 /* retry add_request for this subreq */ 1032 - nfs_page_group_lock(req, false); 1061 + nfs_page_group_lock(req); 1033 1062 continue; 1034 1063 } 1035 1064 ··· 1126 1155 1127 1156 for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1128 1157 if (midx) { 1129 - nfs_page_group_lock(req, false); 1158 + nfs_page_group_lock(req); 1130 1159 1131 1160 /* find the last request */ 1132 1161 for (lastreq = req->wb_head;

-41

fs/nfs/pnfs.c

··· 529 529 } 530 530 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 531 531 532 - static void pnfs_free_lseg_async_work(struct work_struct *work) 533 - { 534 - struct pnfs_layout_segment *lseg; 535 - struct pnfs_layout_hdr *lo; 536 - 537 - lseg = container_of(work, struct pnfs_layout_segment, pls_work); 538 - lo = lseg->pls_layout; 539 - 540 - pnfs_free_lseg(lseg); 541 - pnfs_put_layout_hdr(lo); 542 - } 543 - 544 - static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 545 - { 546 - INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 547 - schedule_work(&lseg->pls_work); 548 - } 549 - 550 - void 551 - pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 552 - { 553 - if (!lseg) 554 - return; 555 - 556 - assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 557 - 558 - dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 559 - atomic_read(&lseg->pls_refcount), 560 - test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 561 - if (atomic_dec_and_test(&lseg->pls_refcount)) { 562 - struct pnfs_layout_hdr *lo = lseg->pls_layout; 563 - if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 564 - return; 565 - pnfs_layout_remove_lseg(lo, lseg); 566 - if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { 567 - pnfs_get_layout_hdr(lo); 568 - pnfs_free_lseg_async(lseg); 569 - } 570 - } 571 - } 572 - 573 532 /* 574 533 * is l2 fully contained in l1? 575 534 * start1 end1

-2

fs/nfs/pnfs.h

··· 67 67 u32 pls_seq; 68 68 unsigned long pls_flags; 69 69 struct pnfs_layout_hdr *pls_layout; 70 - struct work_struct pls_work; 71 70 }; 72 71 73 72 enum pnfs_try_status { ··· 229 230 /* pnfs.c */ 230 231 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 231 232 void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 232 - void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); 233 233 234 234 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); 235 235 void unset_pnfs_layoutdriver(struct nfs_server *);

+23 -14

fs/nfs/pnfs_nfs.c

··· 83 83 } 84 84 out: 85 85 nfs_request_remove_commit_list(req, cinfo); 86 - pnfs_put_lseg_locked(freeme); 86 + pnfs_put_lseg(freeme); 87 87 } 88 88 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); 89 89 ··· 91 91 pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 92 struct nfs_commit_info *cinfo, int max) 93 93 { 94 - struct nfs_page *req, *tmp; 94 + struct nfs_page *req; 95 95 int ret = 0; 96 96 97 - list_for_each_entry_safe(req, tmp, src, wb_list) { 98 - if (!nfs_lock_request(req)) 99 - continue; 97 + while(!list_empty(src)) { 98 + req = list_first_entry(src, struct nfs_page, wb_list); 99 + 100 100 kref_get(&req->wb_kref); 101 - if (cond_resched_lock(&cinfo->inode->i_lock)) 102 - list_safe_reset_next(req, tmp, wb_list); 101 + if (!nfs_lock_request(req)) { 102 + int status; 103 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 104 + status = nfs_wait_on_request(req); 105 + nfs_release_request(req); 106 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 107 + if (status < 0) 108 + break; 109 + continue; 110 + } 103 111 nfs_request_remove_commit_list(req, cinfo); 104 112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 105 113 nfs_list_add_request(req, dst); 106 114 ret++; 107 115 if ((ret == max) && !cinfo->dreq) 108 116 break; 117 + cond_resched(); 109 118 } 110 119 return ret; 111 120 } ··· 128 119 struct list_head *dst = &bucket->committing; 129 120 int ret; 130 121 131 - lockdep_assert_held(&cinfo->inode->i_lock); 122 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 132 123 ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); 133 124 if (ret) { 134 125 cinfo->ds->nwritten -= ret; ··· 136 127 if (bucket->clseg == NULL) 137 128 bucket->clseg = pnfs_get_lseg(bucket->wlseg); 138 129 if (list_empty(src)) { 139 - pnfs_put_lseg_locked(bucket->wlseg); 130 + pnfs_put_lseg(bucket->wlseg); 140 131 bucket->wlseg = NULL; 141 132 } 142 133 } ··· 151 142 { 152 143 int i, rv = 0, cnt; 153 144 154 - lockdep_assert_held(&cinfo->inode->i_lock); 145 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 155 146 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { 156 147 cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], 157 148 cinfo, max); ··· 171 162 int nwritten; 172 163 int i; 173 164 174 - lockdep_assert_held(&cinfo->inode->i_lock); 165 + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 175 166 restart: 176 167 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 177 168 nwritten = pnfs_generic_transfer_commit_list(&b->written, ··· 962 953 struct list_head *list; 963 954 struct pnfs_commit_bucket *buckets; 964 955 965 - spin_lock(&cinfo->inode->i_lock); 956 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 966 957 buckets = cinfo->ds->buckets; 967 958 list = &buckets[ds_commit_idx].written; 968 959 if (list_empty(list)) { 969 960 if (!pnfs_is_valid_lseg(lseg)) { 970 - spin_unlock(&cinfo->inode->i_lock); 961 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 971 962 cinfo->completion_ops->resched_write(cinfo, req); 972 963 return; 973 964 } ··· 984 975 cinfo->ds->nwritten++; 985 976 986 977 nfs_request_add_commit_list_locked(req, list, cinfo); 987 - spin_unlock(&cinfo->inode->i_lock); 978 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 988 979 nfs_mark_page_unstable(req->wb_page, cinfo); 989 980 } 990 981 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);

+208 -236

fs/nfs/write.c

··· 154 154 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 155 155 } 156 156 157 + static struct nfs_page * 158 + nfs_page_private_request(struct page *page) 159 + { 160 + if (!PagePrivate(page)) 161 + return NULL; 162 + return (struct nfs_page *)page_private(page); 163 + } 164 + 157 165 /* 158 166 * nfs_page_find_head_request_locked - find head request associated with @page 159 167 * ··· 170 162 * returns matching head request with reference held, or NULL if not found. 171 163 */ 172 164 static struct nfs_page * 173 - nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) 165 + nfs_page_find_private_request(struct page *page) 174 166 { 175 - struct nfs_page *req = NULL; 167 + struct address_space *mapping = page_file_mapping(page); 168 + struct nfs_page *req; 176 169 177 - if (PagePrivate(page)) 178 - req = (struct nfs_page *)page_private(page); 179 - else if (unlikely(PageSwapCache(page))) 180 - req = nfs_page_search_commits_for_head_request_locked(nfsi, 181 - page); 182 - 170 + if (!PagePrivate(page)) 171 + return NULL; 172 + spin_lock(&mapping->private_lock); 173 + req = nfs_page_private_request(page); 183 174 if (req) { 184 175 WARN_ON_ONCE(req->wb_head != req); 185 176 kref_get(&req->wb_kref); 186 177 } 178 + spin_unlock(&mapping->private_lock); 179 + return req; 180 + } 187 181 182 + static struct nfs_page * 183 + nfs_page_find_swap_request(struct page *page) 184 + { 185 + struct inode *inode = page_file_mapping(page)->host; 186 + struct nfs_inode *nfsi = NFS_I(inode); 187 + struct nfs_page *req = NULL; 188 + if (!PageSwapCache(page)) 189 + return NULL; 190 + mutex_lock(&nfsi->commit_mutex); 191 + if (PageSwapCache(page)) { 192 + req = nfs_page_search_commits_for_head_request_locked(nfsi, 193 + page); 194 + if (req) { 195 + WARN_ON_ONCE(req->wb_head != req); 196 + kref_get(&req->wb_kref); 197 + } 198 + } 199 + mutex_unlock(&nfsi->commit_mutex); 188 200 return req; 189 201 } 190 202 ··· 215 187 */ 216 188 static struct nfs_page *nfs_page_find_head_request(struct page *page) 217 189 { 218 - struct inode *inode = page_file_mapping(page)->host; 219 - struct nfs_page *req = NULL; 190 + struct nfs_page *req; 220 191 221 - spin_lock(&inode->i_lock); 222 - req = nfs_page_find_head_request_locked(NFS_I(inode), page); 223 - spin_unlock(&inode->i_lock); 192 + req = nfs_page_find_private_request(page); 193 + if (!req) 194 + req = nfs_page_find_swap_request(page); 224 195 return req; 225 196 } 226 197 ··· 268 241 { 269 242 struct nfs_page *req; 270 243 271 - WARN_ON_ONCE(head != head->wb_head); 272 - WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); 273 - 274 244 req = head; 275 245 do { 276 246 if (page_offset >= req->wb_pgbase && ··· 293 269 unsigned int pos = 0; 294 270 unsigned int len = nfs_page_length(req->wb_page); 295 271 296 - nfs_page_group_lock(req, false); 272 + nfs_page_group_lock(req); 297 273 298 - do { 274 + for (;;) { 299 275 tmp = nfs_page_group_search_locked(req->wb_head, pos); 300 - if (tmp) { 301 - /* no way this should happen */ 302 - WARN_ON_ONCE(tmp->wb_pgbase != pos); 303 - pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); 304 - } 305 - } while (tmp && pos < len); 276 + if (!tmp) 277 + break; 278 + pos = tmp->wb_pgbase + tmp->wb_bytes; 279 + } 306 280 307 281 nfs_page_group_unlock(req); 308 - WARN_ON_ONCE(pos > len); 309 - return pos == len; 282 + return pos >= len; 310 283 } 311 284 312 285 /* We can set the PG_uptodate flag if we see that a write request ··· 354 333 { 355 334 struct inode *inode = page_file_mapping(req->wb_page)->host; 356 335 struct nfs_server *nfss = NFS_SERVER(inode); 336 + bool is_done; 357 337 358 - if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) 338 + is_done = nfs_page_group_sync_on_bit(req, PG_WB_END); 339 + nfs_unlock_request(req); 340 + if (!is_done) 359 341 return; 360 342 361 343 end_page_writeback(req->wb_page); 362 344 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 363 345 clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 364 346 } 365 - 366 - 367 - /* nfs_page_group_clear_bits 368 - * @req - an nfs request 369 - * clears all page group related bits from @req 370 - */ 371 - static void 372 - nfs_page_group_clear_bits(struct nfs_page *req) 373 - { 374 - clear_bit(PG_TEARDOWN, &req->wb_flags); 375 - clear_bit(PG_UNLOCKPAGE, &req->wb_flags); 376 - clear_bit(PG_UPTODATE, &req->wb_flags); 377 - clear_bit(PG_WB_END, &req->wb_flags); 378 - clear_bit(PG_REMOVE, &req->wb_flags); 379 - } 380 - 381 347 382 348 /* 383 349 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req ··· 374 366 * @inode - inode associated with request page group, must be holding inode lock 375 367 * @head - head request of page group, must be holding head lock 376 368 * @req - request that couldn't lock and needs to wait on the req bit lock 377 - * @nonblock - if true, don't actually wait 378 369 * 379 - * NOTE: this must be called holding page_group bit lock and inode spin lock 380 - * and BOTH will be released before returning. 370 + * NOTE: this must be called holding page_group bit lock 371 + * which will be released before returning. 381 372 * 382 373 * returns 0 on success, < 0 on error. 383 374 */ 384 - static int 385 - nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, 386 - struct nfs_page *req, bool nonblock) 387 - __releases(&inode->i_lock) 375 + static void 376 + nfs_unroll_locks(struct inode *inode, struct nfs_page *head, 377 + struct nfs_page *req) 388 378 { 389 379 struct nfs_page *tmp; 390 - int ret; 391 380 392 381 /* relinquish all the locks successfully grabbed this run */ 393 - for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) 394 - nfs_unlock_request(tmp); 395 - 396 - WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); 397 - 398 - /* grab a ref on the request that will be waited on */ 399 - kref_get(&req->wb_kref); 400 - 401 - nfs_page_group_unlock(head); 402 - spin_unlock(&inode->i_lock); 403 - 404 - /* release ref from nfs_page_find_head_request_locked */ 405 - nfs_release_request(head); 406 - 407 - if (!nonblock) 408 - ret = nfs_wait_on_request(req); 409 - else 410 - ret = -EAGAIN; 411 - nfs_release_request(req); 412 - 413 - return ret; 382 + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { 383 + if (!kref_read(&tmp->wb_kref)) 384 + continue; 385 + nfs_unlock_and_release_request(tmp); 386 + } 414 387 } 415 388 416 389 /* ··· 406 417 */ 407 418 static void 408 419 nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, 409 - struct nfs_page *old_head) 420 + struct nfs_page *old_head, 421 + struct inode *inode) 410 422 { 411 423 while (destroy_list) { 412 424 struct nfs_page *subreq = destroy_list; ··· 418 428 WARN_ON_ONCE(old_head != subreq->wb_head); 419 429 420 430 /* make sure old group is not used */ 421 - subreq->wb_head = subreq; 422 431 subreq->wb_this_page = subreq; 432 + 433 + clear_bit(PG_REMOVE, &subreq->wb_flags); 434 + 435 + /* Note: races with nfs_page_group_destroy() */ 436 + if (!kref_read(&subreq->wb_kref)) { 437 + /* Check if we raced with nfs_page_group_destroy() */ 438 + if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) 439 + nfs_free_request(subreq); 440 + continue; 441 + } 442 + 443 + subreq->wb_head = subreq; 444 + 445 + if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { 446 + nfs_release_request(subreq); 447 + atomic_long_dec(&NFS_I(inode)->nrequests); 448 + } 423 449 424 450 /* subreq is now totally disconnected from page group or any 425 451 * write / commit lists. last chance to wake any waiters */ 426 - nfs_unlock_request(subreq); 427 - 428 - if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { 429 - /* release ref on old head request */ 430 - nfs_release_request(old_head); 431 - 432 - nfs_page_group_clear_bits(subreq); 433 - 434 - /* release the PG_INODE_REF reference */ 435 - if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) 436 - nfs_release_request(subreq); 437 - else 438 - WARN_ON_ONCE(1); 439 - } else { 440 - WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); 441 - /* zombie requests have already released the last 442 - * reference and were waiting on the rest of the 443 - * group to complete. Since it's no longer part of a 444 - * group, simply free the request */ 445 - nfs_page_group_clear_bits(subreq); 446 - nfs_free_request(subreq); 447 - } 452 + nfs_unlock_and_release_request(subreq); 448 453 } 449 454 } 450 455 ··· 449 464 * operations for this page. 450 465 * 451 466 * @page - the page used to lookup the "page group" of nfs_page structures 452 - * @nonblock - if true, don't block waiting for request locks 453 467 * 454 468 * This function joins all sub requests to the head request by first 455 469 * locking all requests in the group, cancelling any pending operations ··· 462 478 * error was encountered. 463 479 */ 464 480 static struct nfs_page * 465 - nfs_lock_and_join_requests(struct page *page, bool nonblock) 481 + nfs_lock_and_join_requests(struct page *page) 466 482 { 467 483 struct inode *inode = page_file_mapping(page)->host; 468 484 struct nfs_page *head, *subreq; ··· 471 487 int ret; 472 488 473 489 try_again: 474 - total_bytes = 0; 475 - 476 - WARN_ON_ONCE(destroy_list); 477 - 478 - spin_lock(&inode->i_lock); 479 - 480 490 /* 481 491 * A reference is taken only on the head request which acts as a 482 492 * reference to the whole page group - the group will not be destroyed 483 493 * until the head reference is released. 484 494 */ 485 - head = nfs_page_find_head_request_locked(NFS_I(inode), page); 486 - 487 - if (!head) { 488 - spin_unlock(&inode->i_lock); 495 + head = nfs_page_find_head_request(page); 496 + if (!head) 489 497 return NULL; 498 + 499 + /* lock the page head first in order to avoid an ABBA inefficiency */ 500 + if (!nfs_lock_request(head)) { 501 + ret = nfs_wait_on_request(head); 502 + nfs_release_request(head); 503 + if (ret < 0) 504 + return ERR_PTR(ret); 505 + goto try_again; 490 506 } 491 507 492 - /* holding inode lock, so always make a non-blocking call to try the 493 - * page group lock */ 494 - ret = nfs_page_group_lock(head, true); 508 + /* Ensure that nobody removed the request before we locked it */ 509 + if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { 510 + nfs_unlock_and_release_request(head); 511 + goto try_again; 512 + } 513 + 514 + ret = nfs_page_group_lock(head); 495 515 if (ret < 0) { 496 - spin_unlock(&inode->i_lock); 497 - 498 - if (!nonblock && ret == -EAGAIN) { 499 - nfs_page_group_lock_wait(head); 500 - nfs_release_request(head); 501 - goto try_again; 502 - } 503 - 504 - nfs_release_request(head); 516 + nfs_unlock_and_release_request(head); 505 517 return ERR_PTR(ret); 506 518 } 507 519 508 520 /* lock each request in the page group */ 509 - subreq = head; 510 - do { 521 + total_bytes = head->wb_bytes; 522 + for (subreq = head->wb_this_page; subreq != head; 523 + subreq = subreq->wb_this_page) { 524 + 525 + if (!kref_get_unless_zero(&subreq->wb_kref)) 526 + continue; 527 + while (!nfs_lock_request(subreq)) { 528 + /* 529 + * Unlock page to allow nfs_page_group_sync_on_bit() 530 + * to succeed 531 + */ 532 + nfs_page_group_unlock(head); 533 + ret = nfs_wait_on_request(subreq); 534 + if (!ret) 535 + ret = nfs_page_group_lock(head); 536 + if (ret < 0) { 537 + nfs_unroll_locks(inode, head, subreq); 538 + nfs_release_request(subreq); 539 + nfs_unlock_and_release_request(head); 540 + return ERR_PTR(ret); 541 + } 542 + } 511 543 /* 512 544 * Subrequests are always contiguous, non overlapping 513 545 * and in order - but may be repeated (mirrored writes). ··· 534 534 } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || 535 535 ((subreq->wb_offset + subreq->wb_bytes) > 536 536 (head->wb_offset + total_bytes)))) { 537 + nfs_unroll_locks(inode, head, subreq); 538 + nfs_unlock_and_release_request(subreq); 537 539 nfs_page_group_unlock(head); 538 - spin_unlock(&inode->i_lock); 540 + nfs_unlock_and_release_request(head); 539 541 return ERR_PTR(-EIO); 540 542 } 541 - 542 - if (!nfs_lock_request(subreq)) { 543 - /* releases page group bit lock and 544 - * inode spin lock and all references */ 545 - ret = nfs_unroll_locks_and_wait(inode, head, 546 - subreq, nonblock); 547 - 548 - if (ret == 0) 549 - goto try_again; 550 - 551 - return ERR_PTR(ret); 552 - } 553 - 554 - subreq = subreq->wb_this_page; 555 - } while (subreq != head); 543 + } 556 544 557 545 /* Now that all requests are locked, make sure they aren't on any list. 558 546 * Commit list removal accounting is done after locks are dropped */ ··· 561 573 head->wb_bytes = total_bytes; 562 574 } 563 575 564 - /* 565 - * prepare head request to be added to new pgio descriptor 566 - */ 567 - nfs_page_group_clear_bits(head); 568 - 569 - /* 570 - * some part of the group was still on the inode list - otherwise 571 - * the group wouldn't be involved in async write. 572 - * grab a reference for the head request, iff it needs one. 573 - */ 574 - if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) 576 + /* Postpone destruction of this request */ 577 + if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { 578 + set_bit(PG_INODE_REF, &head->wb_flags); 575 579 kref_get(&head->wb_kref); 580 + atomic_long_inc(&NFS_I(inode)->nrequests); 581 + } 576 582 577 583 nfs_page_group_unlock(head); 578 584 579 - /* drop lock to clean uprequests on destroy list */ 580 - spin_unlock(&inode->i_lock); 585 + nfs_destroy_unlinked_subrequests(destroy_list, head, inode); 581 586 582 - nfs_destroy_unlinked_subrequests(destroy_list, head); 587 + /* Did we lose a race with nfs_inode_remove_request()? */ 588 + if (!(PagePrivate(page) || PageSwapCache(page))) { 589 + nfs_unlock_and_release_request(head); 590 + return NULL; 591 + } 583 592 584 - /* still holds ref on head from nfs_page_find_head_request_locked 593 + /* still holds ref on head from nfs_page_find_head_request 585 594 * and still has lock on head from lock loop */ 586 595 return head; 587 596 } 588 597 589 598 static void nfs_write_error_remove_page(struct nfs_page *req) 590 599 { 591 - nfs_unlock_request(req); 592 600 nfs_end_page_writeback(req); 593 601 generic_error_remove_page(page_file_mapping(req->wb_page), 594 602 req->wb_page); ··· 608 624 * May return an error if the user signalled nfs_wait_on_request(). 609 625 */ 610 626 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 611 - struct page *page, bool nonblock) 627 + struct page *page) 612 628 { 613 629 struct nfs_page *req; 614 630 int ret = 0; 615 631 616 - req = nfs_lock_and_join_requests(page, nonblock); 632 + req = nfs_lock_and_join_requests(page); 617 633 if (!req) 618 634 goto out; 619 635 ret = PTR_ERR(req); ··· 656 672 int ret; 657 673 658 674 nfs_pageio_cond_complete(pgio, page_index(page)); 659 - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 675 + ret = nfs_page_async_flush(pgio, page); 660 676 if (ret == -EAGAIN) { 661 677 redirty_page_for_writepage(wbc, page); 662 678 ret = 0; ··· 743 759 */ 744 760 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 745 761 { 762 + struct address_space *mapping = page_file_mapping(req->wb_page); 746 763 struct nfs_inode *nfsi = NFS_I(inode); 747 764 748 765 WARN_ON_ONCE(req->wb_this_page != req); ··· 751 766 /* Lock the request! */ 752 767 nfs_lock_request(req); 753 768 754 - spin_lock(&inode->i_lock); 755 - if (!nfsi->nrequests && 756 - NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) 757 - inode->i_version++; 758 769 /* 759 770 * Swap-space should not get truncated. Hence no need to plug the race 760 771 * with invalidate/truncate. 761 772 */ 773 + spin_lock(&mapping->private_lock); 774 + if (!nfs_have_writebacks(inode) && 775 + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { 776 + spin_lock(&inode->i_lock); 777 + inode->i_version++; 778 + spin_unlock(&inode->i_lock); 779 + } 762 780 if (likely(!PageSwapCache(req->wb_page))) { 763 781 set_bit(PG_MAPPED, &req->wb_flags); 764 782 SetPagePrivate(req->wb_page); 765 783 set_page_private(req->wb_page, (unsigned long)req); 766 784 } 767 - nfsi->nrequests++; 785 + spin_unlock(&mapping->private_lock); 786 + atomic_long_inc(&nfsi->nrequests); 768 787 /* this a head request for a page group - mark it as having an 769 788 * extra reference so sub groups can follow suit. 770 789 * This flag also informs pgio layer when to bump nrequests when 771 790 * adding subrequests. */ 772 791 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); 773 792 kref_get(&req->wb_kref); 774 - spin_unlock(&inode->i_lock); 775 793 } 776 794 777 795 /* ··· 782 794 */ 783 795 static void nfs_inode_remove_request(struct nfs_page *req) 784 796 { 785 - struct inode *inode = d_inode(req->wb_context->dentry); 797 + struct address_space *mapping = page_file_mapping(req->wb_page); 798 + struct inode *inode = mapping->host; 786 799 struct nfs_inode *nfsi = NFS_I(inode); 787 800 struct nfs_page *head; 788 801 802 + atomic_long_dec(&nfsi->nrequests); 789 803 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { 790 804 head = req->wb_head; 791 805 792 - spin_lock(&inode->i_lock); 806 + spin_lock(&mapping->private_lock); 793 807 if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { 794 808 set_page_private(head->wb_page, 0); 795 809 ClearPagePrivate(head->wb_page); 796 810 clear_bit(PG_MAPPED, &head->wb_flags); 797 811 } 798 - nfsi->nrequests--; 799 - spin_unlock(&inode->i_lock); 800 - } else { 801 - spin_lock(&inode->i_lock); 802 - nfsi->nrequests--; 803 - spin_unlock(&inode->i_lock); 812 + spin_unlock(&mapping->private_lock); 804 813 } 805 814 806 815 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) ··· 853 868 * number of outstanding requests requiring a commit as well as 854 869 * the MM page stats. 855 870 * 856 - * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. 871 + * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the 872 + * nfs_page lock. 857 873 */ 858 874 void 859 875 nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, ··· 862 876 { 863 877 set_bit(PG_CLEAN, &req->wb_flags); 864 878 nfs_list_add_request(req, dst); 865 - cinfo->mds->ncommit++; 879 + atomic_long_inc(&cinfo->mds->ncommit); 866 880 } 867 881 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); 868 882 ··· 882 896 void 883 897 nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) 884 898 { 885 - spin_lock(&cinfo->inode->i_lock); 899 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 886 900 nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); 887 - spin_unlock(&cinfo->inode->i_lock); 901 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 888 902 if (req->wb_page) 889 903 nfs_mark_page_unstable(req->wb_page, cinfo); 890 904 } ··· 908 922 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 909 923 return; 910 924 nfs_list_remove_request(req); 911 - cinfo->mds->ncommit--; 925 + atomic_long_dec(&cinfo->mds->ncommit); 912 926 } 913 927 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 914 928 ··· 953 967 WB_RECLAIMABLE); 954 968 } 955 969 956 - /* Called holding inode (/cinfo) lock */ 970 + /* Called holding the request lock on @req */ 957 971 static void 958 972 nfs_clear_request_commit(struct nfs_page *req) 959 973 { ··· 962 976 struct nfs_commit_info cinfo; 963 977 964 978 nfs_init_cinfo_from_inode(&cinfo, inode); 979 + mutex_lock(&NFS_I(inode)->commit_mutex); 965 980 if (!pnfs_clear_request_commit(req, &cinfo)) { 966 981 nfs_request_remove_commit_list(req, &cinfo); 967 982 } 983 + mutex_unlock(&NFS_I(inode)->commit_mutex); 968 984 nfs_clear_page_commit(req->wb_page); 969 985 } 970 986 } ··· 1011 1023 remove_req: 1012 1024 nfs_inode_remove_request(req); 1013 1025 next: 1014 - nfs_unlock_request(req); 1015 1026 nfs_end_page_writeback(req); 1016 1027 nfs_release_request(req); 1017 1028 } ··· 1022 1035 unsigned long 1023 1036 nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 1024 1037 { 1025 - return cinfo->mds->ncommit; 1038 + return atomic_long_read(&cinfo->mds->ncommit); 1026 1039 } 1027 1040 1028 - /* cinfo->inode->i_lock held by caller */ 1041 + /* NFS_I(cinfo->inode)->commit_mutex held by caller */ 1029 1042 int 1030 1043 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 1031 1044 struct nfs_commit_info *cinfo, int max) 1032 1045 { 1033 - struct nfs_page *req, *tmp; 1046 + struct nfs_page *req; 1034 1047 int ret = 0; 1035 1048 1036 - list_for_each_entry_safe(req, tmp, src, wb_list) { 1037 - if (!nfs_lock_request(req)) 1038 - continue; 1049 + while(!list_empty(src)) { 1050 + req = list_first_entry(src, struct nfs_page, wb_list); 1039 1051 kref_get(&req->wb_kref); 1040 - if (cond_resched_lock(&cinfo->inode->i_lock)) 1041 - list_safe_reset_next(req, tmp, wb_list); 1052 + if (!nfs_lock_request(req)) { 1053 + int status; 1054 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1055 + status = nfs_wait_on_request(req); 1056 + nfs_release_request(req); 1057 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1058 + if (status < 0) 1059 + break; 1060 + continue; 1061 + } 1042 1062 nfs_request_remove_commit_list(req, cinfo); 1043 1063 nfs_list_add_request(req, dst); 1044 1064 ret++; 1045 1065 if ((ret == max) && !cinfo->dreq) 1046 1066 break; 1067 + cond_resched(); 1047 1068 } 1048 1069 return ret; 1049 1070 } ··· 1071 1076 { 1072 1077 int ret = 0; 1073 1078 1074 - spin_lock(&cinfo->inode->i_lock); 1075 - if (cinfo->mds->ncommit > 0) { 1079 + if (!atomic_long_read(&cinfo->mds->ncommit)) 1080 + return 0; 1081 + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1082 + if (atomic_long_read(&cinfo->mds->ncommit) > 0) { 1076 1083 const int max = INT_MAX; 1077 1084 1078 1085 ret = nfs_scan_commit_list(&cinfo->mds->list, dst, 1079 1086 cinfo, max); 1080 1087 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); 1081 1088 } 1082 - spin_unlock(&cinfo->inode->i_lock); 1089 + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1083 1090 return ret; 1084 1091 } 1085 1092 ··· 1102 1105 unsigned int end; 1103 1106 int error; 1104 1107 1105 - if (!PagePrivate(page)) 1106 - return NULL; 1107 - 1108 1108 end = offset + bytes; 1109 - spin_lock(&inode->i_lock); 1110 1109 1111 - for (;;) { 1112 - req = nfs_page_find_head_request_locked(NFS_I(inode), page); 1113 - if (req == NULL) 1114 - goto out_unlock; 1110 + req = nfs_lock_and_join_requests(page); 1111 + if (IS_ERR_OR_NULL(req)) 1112 + return req; 1115 1113 1116 - /* should be handled by nfs_flush_incompatible */ 1117 - WARN_ON_ONCE(req->wb_head != req); 1118 - WARN_ON_ONCE(req->wb_this_page != req); 1119 - 1120 - rqend = req->wb_offset + req->wb_bytes; 1121 - /* 1122 - * Tell the caller to flush out the request if 1123 - * the offsets are non-contiguous. 1124 - * Note: nfs_flush_incompatible() will already 1125 - * have flushed out requests having wrong owners. 1126 - */ 1127 - if (offset > rqend 1128 - || end < req->wb_offset) 1129 - goto out_flushme; 1130 - 1131 - if (nfs_lock_request(req)) 1132 - break; 1133 - 1134 - /* The request is locked, so wait and then retry */ 1135 - spin_unlock(&inode->i_lock); 1136 - error = nfs_wait_on_request(req); 1137 - nfs_release_request(req); 1138 - if (error != 0) 1139 - goto out_err; 1140 - spin_lock(&inode->i_lock); 1141 - } 1114 + rqend = req->wb_offset + req->wb_bytes; 1115 + /* 1116 + * Tell the caller to flush out the request if 1117 + * the offsets are non-contiguous. 1118 + * Note: nfs_flush_incompatible() will already 1119 + * have flushed out requests having wrong owners. 1120 + */ 1121 + if (offset > rqend || end < req->wb_offset) 1122 + goto out_flushme; 1142 1123 1143 1124 /* Okay, the request matches. Update the region */ 1144 1125 if (offset < req->wb_offset) { ··· 1127 1152 req->wb_bytes = end - req->wb_offset; 1128 1153 else 1129 1154 req->wb_bytes = rqend - req->wb_offset; 1130 - out_unlock: 1131 - if (req) 1132 - nfs_clear_request_commit(req); 1133 - spin_unlock(&inode->i_lock); 1134 1155 return req; 1135 1156 out_flushme: 1136 - spin_unlock(&inode->i_lock); 1137 - nfs_release_request(req); 1157 + /* 1158 + * Note: we mark the request dirty here because 1159 + * nfs_lock_and_join_requests() cannot preserve 1160 + * commit flags, so we have to replay the write. 1161 + */ 1162 + nfs_mark_request_dirty(req); 1163 + nfs_unlock_and_release_request(req); 1138 1164 error = nfs_wb_page(inode, page); 1139 - out_err: 1140 - return ERR_PTR(error); 1165 + return (error < 0) ? ERR_PTR(error) : NULL; 1141 1166 } 1142 1167 1143 1168 /* ··· 1202 1227 l_ctx = req->wb_lock_context; 1203 1228 do_flush = req->wb_page != page || 1204 1229 !nfs_match_open_context(req->wb_context, ctx); 1205 - /* for now, flush if more than 1 request in page_group */ 1206 - do_flush |= req->wb_this_page != req; 1207 1230 if (l_ctx && flctx && 1208 1231 !(list_empty_careful(&flctx->flc_posix) && 1209 1232 list_empty_careful(&flctx->flc_flock))) { ··· 1385 1412 { 1386 1413 nfs_mark_request_dirty(req); 1387 1414 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); 1388 - nfs_unlock_request(req); 1389 1415 nfs_end_page_writeback(req); 1390 1416 nfs_release_request(req); 1391 1417 } ··· 1906 1934 int ret = 0; 1907 1935 1908 1936 /* no commits means nothing needs to be done */ 1909 - if (!nfsi->commit_info.ncommit) 1937 + if (!atomic_long_read(&nfsi->commit_info.ncommit)) 1910 1938 return ret; 1911 1939 1912 1940 if (wbc->sync_mode == WB_SYNC_NONE) { ··· 1987 2015 1988 2016 /* blocking call to cancel all requests and join to a single (head) 1989 2017 * request */ 1990 - req = nfs_lock_and_join_requests(page, false); 2018 + req = nfs_lock_and_join_requests(page); 1991 2019 1992 2020 if (IS_ERR(req)) { 1993 2021 ret = PTR_ERR(req);

+3 -2

include/linux/nfs_fs.h

··· 154 154 */ 155 155 __be32 cookieverf[2]; 156 156 157 - unsigned long nrequests; 157 + atomic_long_t nrequests; 158 158 struct nfs_mds_commit_info commit_info; 159 159 160 160 /* Open contexts for shared mmap writes */ ··· 163 163 /* Readers: in-flight sillydelete RPC calls */ 164 164 /* Writers: rmdir */ 165 165 struct rw_semaphore rmdir_sem; 166 + struct mutex commit_mutex; 166 167 167 168 #if IS_ENABLED(CONFIG_NFS_V4) 168 169 struct nfs4_cached_acl *nfs4_acl; ··· 511 510 static inline int 512 511 nfs_have_writebacks(struct inode *inode) 513 512 { 514 - return NFS_I(inode)->nrequests != 0; 513 + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; 515 514 } 516 515 517 516 /*

+1 -2

include/linux/nfs_page.h

··· 139 139 extern int nfs_wait_on_request(struct nfs_page *); 140 140 extern void nfs_unlock_request(struct nfs_page *req); 141 141 extern void nfs_unlock_and_release_request(struct nfs_page *); 142 - extern int nfs_page_group_lock(struct nfs_page *, bool); 143 - extern void nfs_page_group_lock_wait(struct nfs_page *); 142 + extern int nfs_page_group_lock(struct nfs_page *); 144 143 extern void nfs_page_group_unlock(struct nfs_page *); 145 144 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); 146 145 extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);

+1 -1

include/linux/nfs_xdr.h

··· 1476 1476 1477 1477 struct nfs_mds_commit_info { 1478 1478 atomic_t rpcs_out; 1479 - unsigned long ncommit; 1479 + atomic_long_t ncommit; 1480 1480 struct list_head list; 1481 1481 }; 1482 1482

+2

include/linux/sunrpc/sched.h

··· 139 139 #define RPC_TASK_RUNNING 0 140 140 #define RPC_TASK_QUEUED 1 141 141 #define RPC_TASK_ACTIVE 2 142 + #define RPC_TASK_MSG_RECV 3 143 + #define RPC_TASK_MSG_RECV_WAIT 4 142 144 143 145 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) 144 146 #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)

+3

include/linux/sunrpc/xprt.h

··· 232 232 */ 233 233 spinlock_t transport_lock; /* lock transport info */ 234 234 spinlock_t reserve_lock; /* lock slot table */ 235 + spinlock_t recv_lock; /* lock receive list */ 235 236 u32 xid; /* Next XID value to use */ 236 237 struct rpc_task * snd_task; /* Task blocked in send */ 237 238 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ ··· 373 372 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); 374 373 struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); 375 374 void xprt_complete_rqst(struct rpc_task *task, int copied); 375 + void xprt_pin_rqst(struct rpc_rqst *req); 376 + void xprt_unpin_rqst(struct rpc_rqst *req); 376 377 void xprt_release_rqst_cong(struct rpc_task *task); 377 378 void xprt_disconnect_done(struct rpc_xprt *xprt); 378 379 void xprt_force_disconnect(struct rpc_xprt *xprt);

+2 -2

net/sunrpc/backchannel_rqst.c

··· 171 171 /* 172 172 * Add the temporary list to the backchannel preallocation list 173 173 */ 174 - spin_lock_bh(&xprt->bc_pa_lock); 174 + spin_lock(&xprt->bc_pa_lock); 175 175 list_splice(&tmp_list, &xprt->bc_pa_list); 176 176 xprt_inc_alloc_count(xprt, min_reqs); 177 - spin_unlock_bh(&xprt->bc_pa_lock); 177 + spin_unlock(&xprt->bc_pa_lock); 178 178 179 179 dprintk("RPC: setup backchannel transport done\n"); 180 180 return 0;

+3 -3

net/sunrpc/svcsock.c

··· 1001 1001 1002 1002 if (!bc_xprt) 1003 1003 return -EAGAIN; 1004 - spin_lock_bh(&bc_xprt->transport_lock); 1004 + spin_lock(&bc_xprt->recv_lock); 1005 1005 req = xprt_lookup_rqst(bc_xprt, xid); 1006 1006 if (!req) 1007 1007 goto unlock_notfound; ··· 1019 1019 memcpy(dst->iov_base, src->iov_base, src->iov_len); 1020 1020 xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); 1021 1021 rqstp->rq_arg.len = 0; 1022 - spin_unlock_bh(&bc_xprt->transport_lock); 1022 + spin_unlock(&bc_xprt->recv_lock); 1023 1023 return 0; 1024 1024 unlock_notfound: 1025 1025 printk(KERN_NOTICE ··· 1028 1028 __func__, ntohl(calldir), 1029 1029 bc_xprt, ntohl(xid)); 1030 1030 unlock_eagain: 1031 - spin_unlock_bh(&bc_xprt->transport_lock); 1031 + spin_unlock(&bc_xprt->recv_lock); 1032 1032 return -EAGAIN; 1033 1033 } 1034 1034

+51 -4

net/sunrpc/xprt.c

··· 844 844 } 845 845 EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 846 846 847 + /** 848 + * xprt_pin_rqst - Pin a request on the transport receive list 849 + * @req: Request to pin 850 + * 851 + * Caller must ensure this is atomic with the call to xprt_lookup_rqst() 852 + * so should be holding the xprt transport lock. 853 + */ 854 + void xprt_pin_rqst(struct rpc_rqst *req) 855 + { 856 + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate); 857 + } 858 + 859 + /** 860 + * xprt_unpin_rqst - Unpin a request on the transport receive list 861 + * @req: Request to pin 862 + * 863 + * Caller should be holding the xprt transport lock. 864 + */ 865 + void xprt_unpin_rqst(struct rpc_rqst *req) 866 + { 867 + struct rpc_task *task = req->rq_task; 868 + 869 + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate); 870 + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate)) 871 + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV); 872 + } 873 + 874 + static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) 875 + __must_hold(&req->rq_xprt->recv_lock) 876 + { 877 + struct rpc_task *task = req->rq_task; 878 + 879 + if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { 880 + spin_unlock(&req->rq_xprt->recv_lock); 881 + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); 882 + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, 883 + TASK_UNINTERRUPTIBLE); 884 + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); 885 + spin_lock(&req->rq_xprt->recv_lock); 886 + } 887 + } 888 + 847 889 static void xprt_update_rtt(struct rpc_task *task) 848 890 { 849 891 struct rpc_rqst *req = task->tk_rqstp; ··· 1008 966 /* 1009 967 * Add to the list only if we're expecting a reply 1010 968 */ 1011 - spin_lock_bh(&xprt->transport_lock); 1012 969 /* Update the softirq receive buffer */ 1013 970 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1014 971 sizeof(req->rq_private_buf)); 1015 972 /* Add request to the receive list */ 973 + spin_lock(&xprt->recv_lock); 1016 974 list_add_tail(&req->rq_list, &xprt->recv); 1017 - spin_unlock_bh(&xprt->transport_lock); 975 + spin_unlock(&xprt->recv_lock); 1018 976 xprt_reset_majortimeo(req); 1019 977 /* Turn off autodisconnect */ 1020 978 del_singleshot_timer_sync(&xprt->timer); ··· 1329 1287 task->tk_ops->rpc_count_stats(task, task->tk_calldata); 1330 1288 else if (task->tk_client) 1331 1289 rpc_count_iostats(task, task->tk_client->cl_metrics); 1290 + spin_lock(&xprt->recv_lock); 1291 + if (!list_empty(&req->rq_list)) { 1292 + list_del(&req->rq_list); 1293 + xprt_wait_on_pinned_rqst(req); 1294 + } 1295 + spin_unlock(&xprt->recv_lock); 1332 1296 spin_lock_bh(&xprt->transport_lock); 1333 1297 xprt->ops->release_xprt(xprt, task); 1334 1298 if (xprt->ops->release_request) 1335 1299 xprt->ops->release_request(task); 1336 - if (!list_empty(&req->rq_list)) 1337 - list_del(&req->rq_list); 1338 1300 xprt->last_used = jiffies; 1339 1301 xprt_schedule_autodisconnect(xprt); 1340 1302 spin_unlock_bh(&xprt->transport_lock); ··· 1364 1318 1365 1319 spin_lock_init(&xprt->transport_lock); 1366 1320 spin_lock_init(&xprt->reserve_lock); 1321 + spin_lock_init(&xprt->recv_lock); 1367 1322 1368 1323 INIT_LIST_HEAD(&xprt->free); 1369 1324 INIT_LIST_HEAD(&xprt->recv);

+4 -4

net/sunrpc/xprtrdma/rpc_rdma.c

··· 1051 1051 * RPC completion while holding the transport lock to ensure 1052 1052 * the rep, rqst, and rq_task pointers remain stable. 1053 1053 */ 1054 - spin_lock_bh(&xprt->transport_lock); 1054 + spin_lock(&xprt->recv_lock); 1055 1055 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); 1056 1056 if (!rqst) 1057 1057 goto out_norqst; ··· 1136 1136 xprt_release_rqst_cong(rqst->rq_task); 1137 1137 1138 1138 xprt_complete_rqst(rqst->rq_task, status); 1139 - spin_unlock_bh(&xprt->transport_lock); 1139 + spin_unlock(&xprt->recv_lock); 1140 1140 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", 1141 1141 __func__, xprt, rqst, status); 1142 1142 return; ··· 1187 1187 r_xprt->rx_stats.bad_reply_count++; 1188 1188 goto out; 1189 1189 1190 - /* The req was still available, but by the time the transport_lock 1190 + /* The req was still available, but by the time the recv_lock 1191 1191 * was acquired, the rqst and task had been released. Thus the RPC 1192 1192 * has already been terminated. 1193 1193 */ 1194 1194 out_norqst: 1195 - spin_unlock_bh(&xprt->transport_lock); 1195 + spin_unlock(&xprt->recv_lock); 1196 1196 rpcrdma_buffer_put(req); 1197 1197 dprintk("RPC: %s: race, no rqst left for req %p\n", 1198 1198 __func__, req);

+5 -2

net/sunrpc/xprtrdma/svc_rdma_backchannel.c

··· 52 52 if (src->iov_len < 24) 53 53 goto out_shortreply; 54 54 55 - spin_lock_bh(&xprt->transport_lock); 55 + spin_lock(&xprt->recv_lock); 56 56 req = xprt_lookup_rqst(xprt, xid); 57 57 if (!req) 58 58 goto out_notfound; ··· 69 69 else if (credits > r_xprt->rx_buf.rb_bc_max_requests) 70 70 credits = r_xprt->rx_buf.rb_bc_max_requests; 71 71 72 + spin_lock_bh(&xprt->transport_lock); 72 73 cwnd = xprt->cwnd; 73 74 xprt->cwnd = credits << RPC_CWNDSHIFT; 74 75 if (xprt->cwnd > cwnd) 75 76 xprt_release_rqst_cong(req->rq_task); 77 + spin_unlock_bh(&xprt->transport_lock); 78 + 76 79 77 80 ret = 0; 78 81 xprt_complete_rqst(req->rq_task, rcvbuf->len); 79 82 rcvbuf->len = 0; 80 83 81 84 out_unlock: 82 - spin_unlock_bh(&xprt->transport_lock); 85 + spin_unlock(&xprt->recv_lock); 83 86 out: 84 87 return ret; 85 88

+44 -40

net/sunrpc/xprtsock.c

··· 969 969 return; 970 970 971 971 /* Look up and lock the request corresponding to the given XID */ 972 - spin_lock_bh(&xprt->transport_lock); 972 + spin_lock(&xprt->recv_lock); 973 973 rovr = xprt_lookup_rqst(xprt, *xp); 974 974 if (!rovr) 975 975 goto out_unlock; 976 + xprt_pin_rqst(rovr); 977 + spin_unlock(&xprt->recv_lock); 976 978 task = rovr->rq_task; 977 979 978 980 copied = rovr->rq_private_buf.buflen; ··· 983 981 984 982 if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { 985 983 dprintk("RPC: sk_buff copy failed\n"); 986 - goto out_unlock; 984 + spin_lock(&xprt->recv_lock); 985 + goto out_unpin; 987 986 } 988 987 988 + spin_lock(&xprt->recv_lock); 989 989 xprt_complete_rqst(task, copied); 990 - 990 + out_unpin: 991 + xprt_unpin_rqst(rovr); 991 992 out_unlock: 992 - spin_unlock_bh(&xprt->transport_lock); 993 + spin_unlock(&xprt->recv_lock); 993 994 } 994 995 995 996 static void xs_local_data_receive(struct sock_xprt *transport) ··· 1055 1050 return; 1056 1051 1057 1052 /* Look up and lock the request corresponding to the given XID */ 1058 - spin_lock_bh(&xprt->transport_lock); 1053 + spin_lock(&xprt->recv_lock); 1059 1054 rovr = xprt_lookup_rqst(xprt, *xp); 1060 1055 if (!rovr) 1061 1056 goto out_unlock; 1057 + xprt_pin_rqst(rovr); 1058 + spin_unlock(&xprt->recv_lock); 1062 1059 task = rovr->rq_task; 1063 1060 1064 1061 if ((copied = rovr->rq_private_buf.buflen) > repsize) ··· 1069 1062 /* Suck it into the iovec, verify checksum if not done by hw. */ 1070 1063 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1071 1064 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1072 - goto out_unlock; 1065 + spin_lock(&xprt->recv_lock); 1066 + goto out_unpin; 1073 1067 } 1074 1068 1075 1069 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1076 1070 1071 + spin_lock_bh(&xprt->transport_lock); 1077 1072 xprt_adjust_cwnd(xprt, task, copied); 1078 - xprt_complete_rqst(task, copied); 1079 - 1080 - out_unlock: 1081 1073 spin_unlock_bh(&xprt->transport_lock); 1074 + spin_lock(&xprt->recv_lock); 1075 + xprt_complete_rqst(task, copied); 1076 + out_unpin: 1077 + xprt_unpin_rqst(rovr); 1078 + out_unlock: 1079 + spin_unlock(&xprt->recv_lock); 1082 1080 } 1083 1081 1084 1082 static void xs_udp_data_receive(struct sock_xprt *transport) ··· 1289 1277 } 1290 1278 1291 1279 len = desc->count; 1292 - if (len > transport->tcp_reclen - transport->tcp_offset) { 1293 - struct xdr_skb_reader my_desc; 1294 - 1295 - len = transport->tcp_reclen - transport->tcp_offset; 1296 - memcpy(&my_desc, desc, sizeof(my_desc)); 1297 - my_desc.count = len; 1298 - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1299 - &my_desc, xdr_skb_read_bits); 1300 - desc->count -= r; 1301 - desc->offset += r; 1302 - } else 1303 - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1280 + if (len > transport->tcp_reclen - transport->tcp_offset) 1281 + desc->count = transport->tcp_reclen - transport->tcp_offset; 1282 + r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1304 1283 desc, xdr_skb_read_bits); 1305 1284 1306 - if (r > 0) { 1307 - transport->tcp_copied += r; 1308 - transport->tcp_offset += r; 1309 - } 1310 - if (r != len) { 1285 + if (desc->count) { 1311 1286 /* Error when copying to the receive buffer, 1312 1287 * usually because we weren't able to allocate 1313 1288 * additional buffer pages. All we can do now ··· 1313 1314 transport->tcp_offset, transport->tcp_reclen); 1314 1315 return; 1315 1316 } 1317 + 1318 + transport->tcp_copied += r; 1319 + transport->tcp_offset += r; 1320 + desc->count = len - r; 1316 1321 1317 1322 dprintk("RPC: XID %08x read %zd bytes\n", 1318 1323 ntohl(transport->tcp_xid), r); ··· 1346 1343 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1347 1344 1348 1345 /* Find and lock the request corresponding to this xid */ 1349 - spin_lock_bh(&xprt->transport_lock); 1346 + spin_lock(&xprt->recv_lock); 1350 1347 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1351 1348 if (!req) { 1352 1349 dprintk("RPC: XID %08x request not found!\n", 1353 1350 ntohl(transport->tcp_xid)); 1354 - spin_unlock_bh(&xprt->transport_lock); 1351 + spin_unlock(&xprt->recv_lock); 1355 1352 return -1; 1356 1353 } 1354 + xprt_pin_rqst(req); 1355 + spin_unlock(&xprt->recv_lock); 1357 1356 1358 1357 xs_tcp_read_common(xprt, desc, req); 1359 1358 1359 + spin_lock(&xprt->recv_lock); 1360 1360 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1361 1361 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1362 - 1363 - spin_unlock_bh(&xprt->transport_lock); 1362 + xprt_unpin_rqst(req); 1363 + spin_unlock(&xprt->recv_lock); 1364 1364 return 0; 1365 1365 } 1366 1366 ··· 1382 1376 container_of(xprt, struct sock_xprt, xprt); 1383 1377 struct rpc_rqst *req; 1384 1378 1385 - /* Look up and lock the request corresponding to the given XID */ 1386 - spin_lock_bh(&xprt->transport_lock); 1379 + /* Look up the request corresponding to the given XID */ 1387 1380 req = xprt_lookup_bc_request(xprt, transport->tcp_xid); 1388 1381 if (req == NULL) { 1389 - spin_unlock_bh(&xprt->transport_lock); 1390 1382 printk(KERN_WARNING "Callback slot table overflowed\n"); 1391 1383 xprt_force_disconnect(xprt); 1392 1384 return -1; ··· 1395 1391 1396 1392 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1397 1393 xprt_complete_bc_request(req, transport->tcp_copied); 1398 - spin_unlock_bh(&xprt->transport_lock); 1399 1394 1400 1395 return 0; 1401 1396 } ··· 1519 1516 .arg.data = xprt, 1520 1517 }; 1521 1518 unsigned long total = 0; 1519 + int loop; 1522 1520 int read = 0; 1523 1521 1524 1522 mutex_lock(&transport->recv_mutex); ··· 1528 1524 goto out; 1529 1525 1530 1526 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1531 - for (;;) { 1527 + for (loop = 0; loop < 64; loop++) { 1532 1528 lock_sock(sk); 1533 1529 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1534 1530 if (read <= 0) { 1535 1531 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1536 1532 release_sock(sk); 1537 - if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1538 - break; 1539 - } else { 1540 - release_sock(sk); 1541 - total += read; 1533 + break; 1542 1534 } 1535 + release_sock(sk); 1536 + total += read; 1543 1537 rd_desc.count = 65536; 1544 1538 } 1539 + if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1540 + queue_work(xprtiod_workqueue, &transport->recv_worker); 1545 1541 out: 1546 1542 mutex_unlock(&transport->recv_mutex); 1547 1543 trace_xs_tcp_data_ready(xprt, read, total);