···1285128512861286static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)12871287{12881288- struct nfs_inode *nfsi = NFS_I(inode);12891288 unsigned long ret = 0;1290128912911290 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)···13141315 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)13151316 && (fattr->valid & NFS_ATTR_FATTR_SIZE)13161317 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)13171317- && nfsi->nrequests == 0) {13181318+ && !nfs_have_writebacks(inode)) {13181319 i_size_write(inode, nfs_size_to_loff_t(fattr->size));13191320 ret |= NFS_INO_INVALID_ATTR;13201321 }···18221823 if (new_isize != cur_isize) {18231824 /* Do we perhaps have any outstanding writes, or has18241825 * the file grown beyond our last write? */18251825- if (nfsi->nrequests == 0 || new_isize > cur_isize) {18261826+ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) {18261827 i_size_write(inode, new_isize);18271828 if (!have_writers)18281829 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;···20112012 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);20122013 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);20132014 INIT_LIST_HEAD(&nfsi->commit_info.list);20142014- nfsi->nrequests = 0;20152015- nfsi->commit_info.ncommit = 0;20152015+ atomic_long_set(&nfsi->nrequests, 0);20162016+ atomic_long_set(&nfsi->commit_info.ncommit, 0);20162017 atomic_set(&nfsi->commit_info.rpcs_out, 0);20172018 init_rwsem(&nfsi->rmdir_sem);20192019+ mutex_init(&nfsi->commit_mutex);20182020 nfs4_init_once(nfsi);20192021}20202022
+19-48
fs/nfs/pagelist.c
···134134/*135135 * nfs_page_group_lock - lock the head of the page group136136 * @req - request in group that is to be locked137137- * @nonblock - if true don't block waiting for lock138137 *139139- * this lock must be held if modifying the page group list138138+ * this lock must be held when traversing or modifying the page139139+ * group list140140 *141141- * return 0 on success, < 0 on error: -EDELAY if nonblocking or the142142- * result from wait_on_bit_lock143143- *144144- * NOTE: calling with nonblock=false should always have set the145145- * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock146146- * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.141141+ * return 0 on success, < 0 on error147142 */148143int149149-nfs_page_group_lock(struct nfs_page *req, bool nonblock)144144+nfs_page_group_lock(struct nfs_page *req)150145{151146 struct nfs_page *head = req->wb_head;152147···150155 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))151156 return 0;152157153153- if (!nonblock) {154154- set_bit(PG_CONTENDED1, &head->wb_flags);155155- smp_mb__after_atomic();156156- return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,157157- TASK_UNINTERRUPTIBLE);158158- }159159-160160- return -EAGAIN;161161-}162162-163163-/*164164- * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it165165- * @req - a request in the group166166- *167167- * This is a blocking call to wait for the group lock to be cleared.168168- */169169-void170170-nfs_page_group_lock_wait(struct nfs_page *req)171171-{172172- struct nfs_page *head = req->wb_head;173173-174174- WARN_ON_ONCE(head != head->wb_head);175175-176176- if (!test_bit(PG_HEADLOCK, &head->wb_flags))177177- return;178158 set_bit(PG_CONTENDED1, &head->wb_flags);179159 smp_mb__after_atomic();180180- wait_on_bit(&head->wb_flags, PG_HEADLOCK,181181- TASK_UNINTERRUPTIBLE);160160+ return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,161161+ TASK_UNINTERRUPTIBLE);182162}183163184164/*···216246{217247 bool ret;218248219219- nfs_page_group_lock(req, false);249249+ nfs_page_group_lock(req);220250 ret = nfs_page_group_sync_on_bit_locked(req, bit);221251 nfs_page_group_unlock(req);222252···258288 inode = page_file_mapping(req->wb_page)->host;259289 set_bit(PG_INODE_REF, &req->wb_flags);260290 kref_get(&req->wb_kref);261261- spin_lock(&inode->i_lock);262262- NFS_I(inode)->nrequests++;263263- spin_unlock(&inode->i_lock);291291+ atomic_long_inc(&NFS_I(inode)->nrequests);264292 }265293 }266294}···274306nfs_page_group_destroy(struct kref *kref)275307{276308 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);309309+ struct nfs_page *head = req->wb_head;277310 struct nfs_page *tmp, *next;278311279279- /* subrequests must release the ref on the head request */280280- if (req->wb_head != req)281281- nfs_release_request(req->wb_head);282282-283312 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))284284- return;313313+ goto out;285314286315 tmp = req;287316 do {···289324 nfs_free_request(tmp);290325 tmp = next;291326 } while (tmp != req);327327+out:328328+ /* subrequests must release the ref on the head request */329329+ if (head != req)330330+ nfs_release_request(head);292331}293332294333/**···434465{435466 kref_put(&req->wb_kref, nfs_page_group_destroy);436467}468468+EXPORT_SYMBOL_GPL(nfs_release_request);437469438470/**439471 * nfs_wait_on_request - Wait for a request to complete.···453483 return wait_on_bit_io(&req->wb_flags, PG_BUSY,454484 TASK_UNINTERRUPTIBLE);455485}486486+EXPORT_SYMBOL_GPL(nfs_wait_on_request);456487457488/*458489 * nfs_generic_pg_test - determine if requests can be coalesced···10071036 unsigned int bytes_left = 0;10081037 unsigned int offset, pgbase;1009103810101010- nfs_page_group_lock(req, false);10391039+ nfs_page_group_lock(req);1011104010121041 subreq = req;10131042 bytes_left = subreq->wb_bytes;···10291058 if (mirror->pg_recoalesce)10301059 return 0;10311060 /* retry add_request for this subreq */10321032- nfs_page_group_lock(req, false);10611061+ nfs_page_group_lock(req);10331062 continue;10341063 }10351064···1126115511271156 for (midx = 0; midx < desc->pg_mirror_count; midx++) {11281157 if (midx) {11291129- nfs_page_group_lock(req, false);11581158+ nfs_page_group_lock(req);1130115911311160 /* find the last request */11321161 for (lastreq = req->wb_head;
···8383 }8484out:8585 nfs_request_remove_commit_list(req, cinfo);8686- pnfs_put_lseg_locked(freeme);8686+ pnfs_put_lseg(freeme);8787}8888EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);8989···9191pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst,9292 struct nfs_commit_info *cinfo, int max)9393{9494- struct nfs_page *req, *tmp;9494+ struct nfs_page *req;9595 int ret = 0;96969797- list_for_each_entry_safe(req, tmp, src, wb_list) {9898- if (!nfs_lock_request(req))9999- continue;9797+ while(!list_empty(src)) {9898+ req = list_first_entry(src, struct nfs_page, wb_list);9999+100100 kref_get(&req->wb_kref);101101- if (cond_resched_lock(&cinfo->inode->i_lock))102102- list_safe_reset_next(req, tmp, wb_list);101101+ if (!nfs_lock_request(req)) {102102+ int status;103103+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);104104+ status = nfs_wait_on_request(req);105105+ nfs_release_request(req);106106+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);107107+ if (status < 0)108108+ break;109109+ continue;110110+ }103111 nfs_request_remove_commit_list(req, cinfo);104112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);105113 nfs_list_add_request(req, dst);106114 ret++;107115 if ((ret == max) && !cinfo->dreq)108116 break;117117+ cond_resched();109118 }110119 return ret;111120}···128119 struct list_head *dst = &bucket->committing;129120 int ret;130121131131- lockdep_assert_held(&cinfo->inode->i_lock);122122+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);132123 ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max);133124 if (ret) {134125 cinfo->ds->nwritten -= ret;···136127 if (bucket->clseg == NULL)137128 bucket->clseg = pnfs_get_lseg(bucket->wlseg);138129 if (list_empty(src)) {139139- pnfs_put_lseg_locked(bucket->wlseg);130130+ pnfs_put_lseg(bucket->wlseg);140131 bucket->wlseg = NULL;141132 }142133 }···151142{152143 int i, rv = 0, cnt;153144154154- lockdep_assert_held(&cinfo->inode->i_lock);145145+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);155146 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {156147 cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],157148 cinfo, max);···171162 int nwritten;172163 int i;173164174174- lockdep_assert_held(&cinfo->inode->i_lock);165165+ lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);175166restart:176167 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {177168 nwritten = pnfs_generic_transfer_commit_list(&b->written,···962953 struct list_head *list;963954 struct pnfs_commit_bucket *buckets;964955965965- spin_lock(&cinfo->inode->i_lock);956956+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);966957 buckets = cinfo->ds->buckets;967958 list = &buckets[ds_commit_idx].written;968959 if (list_empty(list)) {969960 if (!pnfs_is_valid_lseg(lseg)) {970970- spin_unlock(&cinfo->inode->i_lock);961961+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);971962 cinfo->completion_ops->resched_write(cinfo, req);972963 return;973964 }···984975 cinfo->ds->nwritten++;985976986977 nfs_request_add_commit_list_locked(req, list, cinfo);987987- spin_unlock(&cinfo->inode->i_lock);978978+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);988979 nfs_mark_page_unstable(req->wb_page, cinfo);989980}990981EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
+208-236
fs/nfs/write.c
···154154 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);155155}156156157157+static struct nfs_page *158158+nfs_page_private_request(struct page *page)159159+{160160+ if (!PagePrivate(page))161161+ return NULL;162162+ return (struct nfs_page *)page_private(page);163163+}164164+157165/*158166 * nfs_page_find_head_request_locked - find head request associated with @page159167 *···170162 * returns matching head request with reference held, or NULL if not found.171163 */172164static struct nfs_page *173173-nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)165165+nfs_page_find_private_request(struct page *page)174166{175175- struct nfs_page *req = NULL;167167+ struct address_space *mapping = page_file_mapping(page);168168+ struct nfs_page *req;176169177177- if (PagePrivate(page))178178- req = (struct nfs_page *)page_private(page);179179- else if (unlikely(PageSwapCache(page)))180180- req = nfs_page_search_commits_for_head_request_locked(nfsi,181181- page);182182-170170+ if (!PagePrivate(page))171171+ return NULL;172172+ spin_lock(&mapping->private_lock);173173+ req = nfs_page_private_request(page);183174 if (req) {184175 WARN_ON_ONCE(req->wb_head != req);185176 kref_get(&req->wb_kref);186177 }178178+ spin_unlock(&mapping->private_lock);179179+ return req;180180+}187181182182+static struct nfs_page *183183+nfs_page_find_swap_request(struct page *page)184184+{185185+ struct inode *inode = page_file_mapping(page)->host;186186+ struct nfs_inode *nfsi = NFS_I(inode);187187+ struct nfs_page *req = NULL;188188+ if (!PageSwapCache(page))189189+ return NULL;190190+ mutex_lock(&nfsi->commit_mutex);191191+ if (PageSwapCache(page)) {192192+ req = nfs_page_search_commits_for_head_request_locked(nfsi,193193+ page);194194+ if (req) {195195+ WARN_ON_ONCE(req->wb_head != req);196196+ kref_get(&req->wb_kref);197197+ }198198+ }199199+ mutex_unlock(&nfsi->commit_mutex);188200 return req;189201}190202···215187 */216188static struct nfs_page *nfs_page_find_head_request(struct page *page)217189{218218- struct inode *inode = page_file_mapping(page)->host;219219- struct nfs_page *req = NULL;190190+ struct nfs_page *req;220191221221- spin_lock(&inode->i_lock);222222- req = nfs_page_find_head_request_locked(NFS_I(inode), page);223223- spin_unlock(&inode->i_lock);192192+ req = nfs_page_find_private_request(page);193193+ if (!req)194194+ req = nfs_page_find_swap_request(page);224195 return req;225196}226197···268241{269242 struct nfs_page *req;270243271271- WARN_ON_ONCE(head != head->wb_head);272272- WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));273273-274244 req = head;275245 do {276246 if (page_offset >= req->wb_pgbase &&···293269 unsigned int pos = 0;294270 unsigned int len = nfs_page_length(req->wb_page);295271296296- nfs_page_group_lock(req, false);272272+ nfs_page_group_lock(req);297273298298- do {274274+ for (;;) {299275 tmp = nfs_page_group_search_locked(req->wb_head, pos);300300- if (tmp) {301301- /* no way this should happen */302302- WARN_ON_ONCE(tmp->wb_pgbase != pos);303303- pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);304304- }305305- } while (tmp && pos < len);276276+ if (!tmp)277277+ break;278278+ pos = tmp->wb_pgbase + tmp->wb_bytes;279279+ }306280307281 nfs_page_group_unlock(req);308308- WARN_ON_ONCE(pos > len);309309- return pos == len;282282+ return pos >= len;310283}311284312285/* We can set the PG_uptodate flag if we see that a write request···354333{355334 struct inode *inode = page_file_mapping(req->wb_page)->host;356335 struct nfs_server *nfss = NFS_SERVER(inode);336336+ bool is_done;357337358358- if (!nfs_page_group_sync_on_bit(req, PG_WB_END))338338+ is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);339339+ nfs_unlock_request(req);340340+ if (!is_done)359341 return;360342361343 end_page_writeback(req->wb_page);362344 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)363345 clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);364346}365365-366366-367367-/* nfs_page_group_clear_bits368368- * @req - an nfs request369369- * clears all page group related bits from @req370370- */371371-static void372372-nfs_page_group_clear_bits(struct nfs_page *req)373373-{374374- clear_bit(PG_TEARDOWN, &req->wb_flags);375375- clear_bit(PG_UNLOCKPAGE, &req->wb_flags);376376- clear_bit(PG_UPTODATE, &req->wb_flags);377377- clear_bit(PG_WB_END, &req->wb_flags);378378- clear_bit(PG_REMOVE, &req->wb_flags);379379-}380380-381347382348/*383349 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req···374366 * @inode - inode associated with request page group, must be holding inode lock375367 * @head - head request of page group, must be holding head lock376368 * @req - request that couldn't lock and needs to wait on the req bit lock377377- * @nonblock - if true, don't actually wait378369 *379379- * NOTE: this must be called holding page_group bit lock and inode spin lock380380- * and BOTH will be released before returning.370370+ * NOTE: this must be called holding page_group bit lock371371+ * which will be released before returning.381372 *382373 * returns 0 on success, < 0 on error.383374 */384384-static int385385-nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,386386- struct nfs_page *req, bool nonblock)387387- __releases(&inode->i_lock)375375+static void376376+nfs_unroll_locks(struct inode *inode, struct nfs_page *head,377377+ struct nfs_page *req)388378{389379 struct nfs_page *tmp;390390- int ret;391380392381 /* relinquish all the locks successfully grabbed this run */393393- for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)394394- nfs_unlock_request(tmp);395395-396396- WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));397397-398398- /* grab a ref on the request that will be waited on */399399- kref_get(&req->wb_kref);400400-401401- nfs_page_group_unlock(head);402402- spin_unlock(&inode->i_lock);403403-404404- /* release ref from nfs_page_find_head_request_locked */405405- nfs_release_request(head);406406-407407- if (!nonblock)408408- ret = nfs_wait_on_request(req);409409- else410410- ret = -EAGAIN;411411- nfs_release_request(req);412412-413413- return ret;382382+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {383383+ if (!kref_read(&tmp->wb_kref))384384+ continue;385385+ nfs_unlock_and_release_request(tmp);386386+ }414387}415388416389/*···406417 */407418static void408419nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,409409- struct nfs_page *old_head)420420+ struct nfs_page *old_head,421421+ struct inode *inode)410422{411423 while (destroy_list) {412424 struct nfs_page *subreq = destroy_list;···418428 WARN_ON_ONCE(old_head != subreq->wb_head);419429420430 /* make sure old group is not used */421421- subreq->wb_head = subreq;422431 subreq->wb_this_page = subreq;432432+433433+ clear_bit(PG_REMOVE, &subreq->wb_flags);434434+435435+ /* Note: races with nfs_page_group_destroy() */436436+ if (!kref_read(&subreq->wb_kref)) {437437+ /* Check if we raced with nfs_page_group_destroy() */438438+ if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))439439+ nfs_free_request(subreq);440440+ continue;441441+ }442442+443443+ subreq->wb_head = subreq;444444+445445+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {446446+ nfs_release_request(subreq);447447+ atomic_long_dec(&NFS_I(inode)->nrequests);448448+ }423449424450 /* subreq is now totally disconnected from page group or any425451 * write / commit lists. last chance to wake any waiters */426426- nfs_unlock_request(subreq);427427-428428- if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {429429- /* release ref on old head request */430430- nfs_release_request(old_head);431431-432432- nfs_page_group_clear_bits(subreq);433433-434434- /* release the PG_INODE_REF reference */435435- if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))436436- nfs_release_request(subreq);437437- else438438- WARN_ON_ONCE(1);439439- } else {440440- WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));441441- /* zombie requests have already released the last442442- * reference and were waiting on the rest of the443443- * group to complete. Since it's no longer part of a444444- * group, simply free the request */445445- nfs_page_group_clear_bits(subreq);446446- nfs_free_request(subreq);447447- }452452+ nfs_unlock_and_release_request(subreq);448453 }449454}450455···449464 * operations for this page.450465 *451466 * @page - the page used to lookup the "page group" of nfs_page structures452452- * @nonblock - if true, don't block waiting for request locks453467 *454468 * This function joins all sub requests to the head request by first455469 * locking all requests in the group, cancelling any pending operations···462478 * error was encountered.463479 */464480static struct nfs_page *465465-nfs_lock_and_join_requests(struct page *page, bool nonblock)481481+nfs_lock_and_join_requests(struct page *page)466482{467483 struct inode *inode = page_file_mapping(page)->host;468484 struct nfs_page *head, *subreq;···471487 int ret;472488473489try_again:474474- total_bytes = 0;475475-476476- WARN_ON_ONCE(destroy_list);477477-478478- spin_lock(&inode->i_lock);479479-480490 /*481491 * A reference is taken only on the head request which acts as a482492 * reference to the whole page group - the group will not be destroyed483493 * until the head reference is released.484494 */485485- head = nfs_page_find_head_request_locked(NFS_I(inode), page);486486-487487- if (!head) {488488- spin_unlock(&inode->i_lock);495495+ head = nfs_page_find_head_request(page);496496+ if (!head)489497 return NULL;498498+499499+ /* lock the page head first in order to avoid an ABBA inefficiency */500500+ if (!nfs_lock_request(head)) {501501+ ret = nfs_wait_on_request(head);502502+ nfs_release_request(head);503503+ if (ret < 0)504504+ return ERR_PTR(ret);505505+ goto try_again;490506 }491507492492- /* holding inode lock, so always make a non-blocking call to try the493493- * page group lock */494494- ret = nfs_page_group_lock(head, true);508508+ /* Ensure that nobody removed the request before we locked it */509509+ if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {510510+ nfs_unlock_and_release_request(head);511511+ goto try_again;512512+ }513513+514514+ ret = nfs_page_group_lock(head);495515 if (ret < 0) {496496- spin_unlock(&inode->i_lock);497497-498498- if (!nonblock && ret == -EAGAIN) {499499- nfs_page_group_lock_wait(head);500500- nfs_release_request(head);501501- goto try_again;502502- }503503-504504- nfs_release_request(head);516516+ nfs_unlock_and_release_request(head);505517 return ERR_PTR(ret);506518 }507519508520 /* lock each request in the page group */509509- subreq = head;510510- do {521521+ total_bytes = head->wb_bytes;522522+ for (subreq = head->wb_this_page; subreq != head;523523+ subreq = subreq->wb_this_page) {524524+525525+ if (!kref_get_unless_zero(&subreq->wb_kref))526526+ continue;527527+ while (!nfs_lock_request(subreq)) {528528+ /*529529+ * Unlock page to allow nfs_page_group_sync_on_bit()530530+ * to succeed531531+ */532532+ nfs_page_group_unlock(head);533533+ ret = nfs_wait_on_request(subreq);534534+ if (!ret)535535+ ret = nfs_page_group_lock(head);536536+ if (ret < 0) {537537+ nfs_unroll_locks(inode, head, subreq);538538+ nfs_release_request(subreq);539539+ nfs_unlock_and_release_request(head);540540+ return ERR_PTR(ret);541541+ }542542+ }511543 /*512544 * Subrequests are always contiguous, non overlapping513545 * and in order - but may be repeated (mirrored writes).···534534 } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset ||535535 ((subreq->wb_offset + subreq->wb_bytes) >536536 (head->wb_offset + total_bytes)))) {537537+ nfs_unroll_locks(inode, head, subreq);538538+ nfs_unlock_and_release_request(subreq);537539 nfs_page_group_unlock(head);538538- spin_unlock(&inode->i_lock);540540+ nfs_unlock_and_release_request(head);539541 return ERR_PTR(-EIO);540542 }541541-542542- if (!nfs_lock_request(subreq)) {543543- /* releases page group bit lock and544544- * inode spin lock and all references */545545- ret = nfs_unroll_locks_and_wait(inode, head,546546- subreq, nonblock);547547-548548- if (ret == 0)549549- goto try_again;550550-551551- return ERR_PTR(ret);552552- }553553-554554- subreq = subreq->wb_this_page;555555- } while (subreq != head);543543+ }556544557545 /* Now that all requests are locked, make sure they aren't on any list.558546 * Commit list removal accounting is done after locks are dropped */···561573 head->wb_bytes = total_bytes;562574 }563575564564- /*565565- * prepare head request to be added to new pgio descriptor566566- */567567- nfs_page_group_clear_bits(head);568568-569569- /*570570- * some part of the group was still on the inode list - otherwise571571- * the group wouldn't be involved in async write.572572- * grab a reference for the head request, iff it needs one.573573- */574574- if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))576576+ /* Postpone destruction of this request */577577+ if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {578578+ set_bit(PG_INODE_REF, &head->wb_flags);575579 kref_get(&head->wb_kref);580580+ atomic_long_inc(&NFS_I(inode)->nrequests);581581+ }576582577583 nfs_page_group_unlock(head);578584579579- /* drop lock to clean uprequests on destroy list */580580- spin_unlock(&inode->i_lock);585585+ nfs_destroy_unlinked_subrequests(destroy_list, head, inode);581586582582- nfs_destroy_unlinked_subrequests(destroy_list, head);587587+ /* Did we lose a race with nfs_inode_remove_request()? */588588+ if (!(PagePrivate(page) || PageSwapCache(page))) {589589+ nfs_unlock_and_release_request(head);590590+ return NULL;591591+ }583592584584- /* still holds ref on head from nfs_page_find_head_request_locked593593+ /* still holds ref on head from nfs_page_find_head_request585594 * and still has lock on head from lock loop */586595 return head;587596}588597589598static void nfs_write_error_remove_page(struct nfs_page *req)590599{591591- nfs_unlock_request(req);592600 nfs_end_page_writeback(req);593601 generic_error_remove_page(page_file_mapping(req->wb_page),594602 req->wb_page);···608624 * May return an error if the user signalled nfs_wait_on_request().609625 */610626static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,611611- struct page *page, bool nonblock)627627+ struct page *page)612628{613629 struct nfs_page *req;614630 int ret = 0;615631616616- req = nfs_lock_and_join_requests(page, nonblock);632632+ req = nfs_lock_and_join_requests(page);617633 if (!req)618634 goto out;619635 ret = PTR_ERR(req);···656672 int ret;657673658674 nfs_pageio_cond_complete(pgio, page_index(page));659659- ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);675675+ ret = nfs_page_async_flush(pgio, page);660676 if (ret == -EAGAIN) {661677 redirty_page_for_writepage(wbc, page);662678 ret = 0;···743759 */744760static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)745761{762762+ struct address_space *mapping = page_file_mapping(req->wb_page);746763 struct nfs_inode *nfsi = NFS_I(inode);747764748765 WARN_ON_ONCE(req->wb_this_page != req);···751766 /* Lock the request! */752767 nfs_lock_request(req);753768754754- spin_lock(&inode->i_lock);755755- if (!nfsi->nrequests &&756756- NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))757757- inode->i_version++;758769 /*759770 * Swap-space should not get truncated. Hence no need to plug the race760771 * with invalidate/truncate.761772 */773773+ spin_lock(&mapping->private_lock);774774+ if (!nfs_have_writebacks(inode) &&775775+ NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) {776776+ spin_lock(&inode->i_lock);777777+ inode->i_version++;778778+ spin_unlock(&inode->i_lock);779779+ }762780 if (likely(!PageSwapCache(req->wb_page))) {763781 set_bit(PG_MAPPED, &req->wb_flags);764782 SetPagePrivate(req->wb_page);765783 set_page_private(req->wb_page, (unsigned long)req);766784 }767767- nfsi->nrequests++;785785+ spin_unlock(&mapping->private_lock);786786+ atomic_long_inc(&nfsi->nrequests);768787 /* this a head request for a page group - mark it as having an769788 * extra reference so sub groups can follow suit.770789 * This flag also informs pgio layer when to bump nrequests when771790 * adding subrequests. */772791 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));773792 kref_get(&req->wb_kref);774774- spin_unlock(&inode->i_lock);775793}776794777795/*···782794 */783795static void nfs_inode_remove_request(struct nfs_page *req)784796{785785- struct inode *inode = d_inode(req->wb_context->dentry);797797+ struct address_space *mapping = page_file_mapping(req->wb_page);798798+ struct inode *inode = mapping->host;786799 struct nfs_inode *nfsi = NFS_I(inode);787800 struct nfs_page *head;788801802802+ atomic_long_dec(&nfsi->nrequests);789803 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {790804 head = req->wb_head;791805792792- spin_lock(&inode->i_lock);806806+ spin_lock(&mapping->private_lock);793807 if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {794808 set_page_private(head->wb_page, 0);795809 ClearPagePrivate(head->wb_page);796810 clear_bit(PG_MAPPED, &head->wb_flags);797811 }798798- nfsi->nrequests--;799799- spin_unlock(&inode->i_lock);800800- } else {801801- spin_lock(&inode->i_lock);802802- nfsi->nrequests--;803803- spin_unlock(&inode->i_lock);812812+ spin_unlock(&mapping->private_lock);804813 }805814806815 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))···853868 * number of outstanding requests requiring a commit as well as854869 * the MM page stats.855870 *856856- * The caller must hold cinfo->inode->i_lock, and the nfs_page lock.871871+ * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the872872+ * nfs_page lock.857873 */858874void859875nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,···862876{863877 set_bit(PG_CLEAN, &req->wb_flags);864878 nfs_list_add_request(req, dst);865865- cinfo->mds->ncommit++;879879+ atomic_long_inc(&cinfo->mds->ncommit);866880}867881EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);868882···882896void883897nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)884898{885885- spin_lock(&cinfo->inode->i_lock);899899+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);886900 nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);887887- spin_unlock(&cinfo->inode->i_lock);901901+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);888902 if (req->wb_page)889903 nfs_mark_page_unstable(req->wb_page, cinfo);890904}···908922 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))909923 return;910924 nfs_list_remove_request(req);911911- cinfo->mds->ncommit--;925925+ atomic_long_dec(&cinfo->mds->ncommit);912926}913927EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);914928···953967 WB_RECLAIMABLE);954968}955969956956-/* Called holding inode (/cinfo) lock */970970+/* Called holding the request lock on @req */957971static void958972nfs_clear_request_commit(struct nfs_page *req)959973{···962976 struct nfs_commit_info cinfo;963977964978 nfs_init_cinfo_from_inode(&cinfo, inode);979979+ mutex_lock(&NFS_I(inode)->commit_mutex);965980 if (!pnfs_clear_request_commit(req, &cinfo)) {966981 nfs_request_remove_commit_list(req, &cinfo);967982 }983983+ mutex_unlock(&NFS_I(inode)->commit_mutex);968984 nfs_clear_page_commit(req->wb_page);969985 }970986}···10111023remove_req:10121024 nfs_inode_remove_request(req);10131025next:10141014- nfs_unlock_request(req);10151026 nfs_end_page_writeback(req);10161027 nfs_release_request(req);10171028 }···10221035unsigned long10231036nfs_reqs_to_commit(struct nfs_commit_info *cinfo)10241037{10251025- return cinfo->mds->ncommit;10381038+ return atomic_long_read(&cinfo->mds->ncommit);10261039}1027104010281028-/* cinfo->inode->i_lock held by caller */10411041+/* NFS_I(cinfo->inode)->commit_mutex held by caller */10291042int10301043nfs_scan_commit_list(struct list_head *src, struct list_head *dst,10311044 struct nfs_commit_info *cinfo, int max)10321045{10331033- struct nfs_page *req, *tmp;10461046+ struct nfs_page *req;10341047 int ret = 0;1035104810361036- list_for_each_entry_safe(req, tmp, src, wb_list) {10371037- if (!nfs_lock_request(req))10381038- continue;10491049+ while(!list_empty(src)) {10501050+ req = list_first_entry(src, struct nfs_page, wb_list);10391051 kref_get(&req->wb_kref);10401040- if (cond_resched_lock(&cinfo->inode->i_lock))10411041- list_safe_reset_next(req, tmp, wb_list);10521052+ if (!nfs_lock_request(req)) {10531053+ int status;10541054+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);10551055+ status = nfs_wait_on_request(req);10561056+ nfs_release_request(req);10571057+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);10581058+ if (status < 0)10591059+ break;10601060+ continue;10611061+ }10421062 nfs_request_remove_commit_list(req, cinfo);10431063 nfs_list_add_request(req, dst);10441064 ret++;10451065 if ((ret == max) && !cinfo->dreq)10461066 break;10671067+ cond_resched();10471068 }10481069 return ret;10491070}···10711076{10721077 int ret = 0;1073107810741074- spin_lock(&cinfo->inode->i_lock);10751075- if (cinfo->mds->ncommit > 0) {10791079+ if (!atomic_long_read(&cinfo->mds->ncommit))10801080+ return 0;10811081+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);10821082+ if (atomic_long_read(&cinfo->mds->ncommit) > 0) {10761083 const int max = INT_MAX;1077108410781085 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,10791086 cinfo, max);10801087 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);10811088 }10821082- spin_unlock(&cinfo->inode->i_lock);10891089+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);10831090 return ret;10841091}10851092···11021105 unsigned int end;11031106 int error;1104110711051105- if (!PagePrivate(page))11061106- return NULL;11071107-11081108 end = offset + bytes;11091109- spin_lock(&inode->i_lock);1110110911111111- for (;;) {11121112- req = nfs_page_find_head_request_locked(NFS_I(inode), page);11131113- if (req == NULL)11141114- goto out_unlock;11101110+ req = nfs_lock_and_join_requests(page);11111111+ if (IS_ERR_OR_NULL(req))11121112+ return req;1115111311161116- /* should be handled by nfs_flush_incompatible */11171117- WARN_ON_ONCE(req->wb_head != req);11181118- WARN_ON_ONCE(req->wb_this_page != req);11191119-11201120- rqend = req->wb_offset + req->wb_bytes;11211121- /*11221122- * Tell the caller to flush out the request if11231123- * the offsets are non-contiguous.11241124- * Note: nfs_flush_incompatible() will already11251125- * have flushed out requests having wrong owners.11261126- */11271127- if (offset > rqend11281128- || end < req->wb_offset)11291129- goto out_flushme;11301130-11311131- if (nfs_lock_request(req))11321132- break;11331133-11341134- /* The request is locked, so wait and then retry */11351135- spin_unlock(&inode->i_lock);11361136- error = nfs_wait_on_request(req);11371137- nfs_release_request(req);11381138- if (error != 0)11391139- goto out_err;11401140- spin_lock(&inode->i_lock);11411141- }11141114+ rqend = req->wb_offset + req->wb_bytes;11151115+ /*11161116+ * Tell the caller to flush out the request if11171117+ * the offsets are non-contiguous.11181118+ * Note: nfs_flush_incompatible() will already11191119+ * have flushed out requests having wrong owners.11201120+ */11211121+ if (offset > rqend || end < req->wb_offset)11221122+ goto out_flushme;1142112311431124 /* Okay, the request matches. Update the region */11441125 if (offset < req->wb_offset) {···11271152 req->wb_bytes = end - req->wb_offset;11281153 else11291154 req->wb_bytes = rqend - req->wb_offset;11301130-out_unlock:11311131- if (req)11321132- nfs_clear_request_commit(req);11331133- spin_unlock(&inode->i_lock);11341155 return req;11351156out_flushme:11361136- spin_unlock(&inode->i_lock);11371137- nfs_release_request(req);11571157+ /*11581158+ * Note: we mark the request dirty here because11591159+ * nfs_lock_and_join_requests() cannot preserve11601160+ * commit flags, so we have to replay the write.11611161+ */11621162+ nfs_mark_request_dirty(req);11631163+ nfs_unlock_and_release_request(req);11381164 error = nfs_wb_page(inode, page);11391139-out_err:11401140- return ERR_PTR(error);11651165+ return (error < 0) ? ERR_PTR(error) : NULL;11411166}1142116711431168/*···12021227 l_ctx = req->wb_lock_context;12031228 do_flush = req->wb_page != page ||12041229 !nfs_match_open_context(req->wb_context, ctx);12051205- /* for now, flush if more than 1 request in page_group */12061206- do_flush |= req->wb_this_page != req;12071230 if (l_ctx && flctx &&12081231 !(list_empty_careful(&flctx->flc_posix) &&12091232 list_empty_careful(&flctx->flc_flock))) {···13851412{13861413 nfs_mark_request_dirty(req);13871414 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);13881388- nfs_unlock_request(req);13891415 nfs_end_page_writeback(req);13901416 nfs_release_request(req);13911417}···19061934 int ret = 0;1907193519081936 /* no commits means nothing needs to be done */19091909- if (!nfsi->commit_info.ncommit)19371937+ if (!atomic_long_read(&nfsi->commit_info.ncommit))19101938 return ret;1911193919121940 if (wbc->sync_mode == WB_SYNC_NONE) {···1987201519882016 /* blocking call to cancel all requests and join to a single (head)19892017 * request */19901990- req = nfs_lock_and_join_requests(page, false);20182018+ req = nfs_lock_and_join_requests(page);1991201919922020 if (IS_ERR(req)) {19932021 ret = PTR_ERR(req);
···844844}845845EXPORT_SYMBOL_GPL(xprt_lookup_rqst);846846847847+/**848848+ * xprt_pin_rqst - Pin a request on the transport receive list849849+ * @req: Request to pin850850+ *851851+ * Caller must ensure this is atomic with the call to xprt_lookup_rqst()852852+ * so should be holding the xprt transport lock.853853+ */854854+void xprt_pin_rqst(struct rpc_rqst *req)855855+{856856+ set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);857857+}858858+859859+/**860860+ * xprt_unpin_rqst - Unpin a request on the transport receive list861861+ * @req: Request to pin862862+ *863863+ * Caller should be holding the xprt transport lock.864864+ */865865+void xprt_unpin_rqst(struct rpc_rqst *req)866866+{867867+ struct rpc_task *task = req->rq_task;868868+869869+ clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);870870+ if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))871871+ wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);872872+}873873+874874+static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)875875+__must_hold(&req->rq_xprt->recv_lock)876876+{877877+ struct rpc_task *task = req->rq_task;878878+879879+ if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {880880+ spin_unlock(&req->rq_xprt->recv_lock);881881+ set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);882882+ wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,883883+ TASK_UNINTERRUPTIBLE);884884+ clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);885885+ spin_lock(&req->rq_xprt->recv_lock);886886+ }887887+}888888+847889static void xprt_update_rtt(struct rpc_task *task)848890{849891 struct rpc_rqst *req = task->tk_rqstp;···1008966 /*1009967 * Add to the list only if we're expecting a reply1010968 */10111011- spin_lock_bh(&xprt->transport_lock);1012969 /* Update the softirq receive buffer */1013970 memcpy(&req->rq_private_buf, &req->rq_rcv_buf,1014971 sizeof(req->rq_private_buf));1015972 /* Add request to the receive list */973973+ spin_lock(&xprt->recv_lock);1016974 list_add_tail(&req->rq_list, &xprt->recv);10171017- spin_unlock_bh(&xprt->transport_lock);975975+ spin_unlock(&xprt->recv_lock);1018976 xprt_reset_majortimeo(req);1019977 /* Turn off autodisconnect */1020978 del_singleshot_timer_sync(&xprt->timer);···13291287 task->tk_ops->rpc_count_stats(task, task->tk_calldata);13301288 else if (task->tk_client)13311289 rpc_count_iostats(task, task->tk_client->cl_metrics);12901290+ spin_lock(&xprt->recv_lock);12911291+ if (!list_empty(&req->rq_list)) {12921292+ list_del(&req->rq_list);12931293+ xprt_wait_on_pinned_rqst(req);12941294+ }12951295+ spin_unlock(&xprt->recv_lock);13321296 spin_lock_bh(&xprt->transport_lock);13331297 xprt->ops->release_xprt(xprt, task);13341298 if (xprt->ops->release_request)13351299 xprt->ops->release_request(task);13361336- if (!list_empty(&req->rq_list))13371337- list_del(&req->rq_list);13381300 xprt->last_used = jiffies;13391301 xprt_schedule_autodisconnect(xprt);13401302 spin_unlock_bh(&xprt->transport_lock);···1364131813651319 spin_lock_init(&xprt->transport_lock);13661320 spin_lock_init(&xprt->reserve_lock);13211321+ spin_lock_init(&xprt->recv_lock);1367132213681323 INIT_LIST_HEAD(&xprt->free);13691324 INIT_LIST_HEAD(&xprt->recv);
+4-4
net/sunrpc/xprtrdma/rpc_rdma.c
···10511051 * RPC completion while holding the transport lock to ensure10521052 * the rep, rqst, and rq_task pointers remain stable.10531053 */10541054- spin_lock_bh(&xprt->transport_lock);10541054+ spin_lock(&xprt->recv_lock);10551055 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);10561056 if (!rqst)10571057 goto out_norqst;···11361136 xprt_release_rqst_cong(rqst->rq_task);1137113711381138 xprt_complete_rqst(rqst->rq_task, status);11391139- spin_unlock_bh(&xprt->transport_lock);11391139+ spin_unlock(&xprt->recv_lock);11401140 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",11411141 __func__, xprt, rqst, status);11421142 return;···11871187 r_xprt->rx_stats.bad_reply_count++;11881188 goto out;1189118911901190-/* The req was still available, but by the time the transport_lock11901190+/* The req was still available, but by the time the recv_lock11911191 * was acquired, the rqst and task had been released. Thus the RPC11921192 * has already been terminated.11931193 */11941194out_norqst:11951195- spin_unlock_bh(&xprt->transport_lock);11951195+ spin_unlock(&xprt->recv_lock);11961196 rpcrdma_buffer_put(req);11971197 dprintk("RPC: %s: race, no rqst left for req %p\n",11981198 __func__, req);